From 147b589f623fb3bd96ba5217cab0428e46692e30 Mon Sep 17 00:00:00 2001 From: vllm-ascend-ci Date: Wed, 15 Apr 2026 15:27:09 +0800 Subject: [PATCH] [v0.18.0][Doc] Translated Doc files 2026-04-14 (#8257) ## Auto-Translation Summary Translated **102** file(s): - docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po - docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po - docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po - docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po - docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/patch.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/msprobe_guide.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/performance_benchmark.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/service_profiling_guide.po - docs/source/locale/zh_CN/LC_MESSAGES/faqs.po - docs/source/locale/zh_CN/LC_MESSAGES/index.po - docs/source/locale/zh_CN/LC_MESSAGES/installation.po - docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/ACL_Graph.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/ModelRunner_prepare_inputs.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/add_custom_aclnn_op.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/context_parallel.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/cpu_binding.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/disaggregated_prefill.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/eplb_swift_balancer.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/npugraph_ex.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/quantization.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/multi_node_test.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/optimization_and_tuning.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/index.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_multi_node.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_single_node.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_colocated_mooncake_multi_instance.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_multi_node.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_single_node.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/ray.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/suffix_speculative_decoding.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/hardwares/310p.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/hardwares/index.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/DeepSeek-R1.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/DeepSeek-V3.1.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/DeepSeek-V3.2.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/GLM4.x.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/GLM5.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Kimi-K2-Thinking.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Kimi-K2.5.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/MiniMax-M2.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/PaddleOCR-VL.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen-VL-Dense.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen2.5-7B.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen2.5-Omni.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-235B-A22B.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-30B-A3B.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-32B-W4A4.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-8B-W4A8.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Coder-30B-A3B.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Dense.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Next.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-235B-A22B-Instruct.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-30B-A3B-Instruct.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-Embedding.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-Reranker.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3.5-27B.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3.5-397B-A17B.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3_embedding.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3_reranker.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/index.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/index.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/using_volcano_kthena.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Fine_grained_TP.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Multi_Token_Prediction.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/batch_invariance.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/context_parallel.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/cpu_binding.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/dynamic_batch.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/epd_disaggregation.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/eplb_swift_balancer.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/external_dp.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/layer_sharding.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lmcache_ascend_deployment.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/netloader.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/npugraph_ex.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/rfork.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sequence_parallelism.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/speculative_decoding.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/ucm_deployment.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/weight_prefetch.po --- [Workflow run](https://github.com/vllm-project/vllm-ascend/actions/runs/24390263284) Signed-off-by: vllm-ascend-ci Co-authored-by: vllm-ascend-ci --- .../LC_MESSAGES/community/contributors.po | 7887 +++++++++++++---- .../zh_CN/LC_MESSAGES/community/governance.po | 214 +- .../community/user_stories/index.po | 118 +- .../community/user_stories/llamafactory.po | 86 +- .../community/versioning_policy.po | 1326 ++- .../Design_Documents/ACL_Graph.po | 283 + .../Design_Documents/KV_Cache_Pool_Guide.po | 309 + .../ModelRunner_prepare_inputs.po | 629 ++ .../Design_Documents/add_custom_aclnn_op.po | 84 + .../Design_Documents/context_parallel.po | 391 + .../Design_Documents/cpu_binding.po | 814 ++ .../Design_Documents/disaggregated_prefill.po | 360 + .../Design_Documents/eplb_swift_balancer.po | 467 + .../Design_Documents/npugraph_ex.po | 220 + .../developer_guide/Design_Documents/patch.po | 261 +- .../Design_Documents/quantization.po | 359 + .../developer_guide/contribution/index.po | 168 +- .../contribution/multi_node_test.po | 222 + .../developer_guide/contribution/testing.po | 301 +- .../evaluation/using_ais_bench.po | 238 + .../evaluation/using_evalscope.po | 116 +- .../evaluation/using_lm_eval.po | 131 +- .../evaluation/using_opencompass.po | 73 +- .../performance_and_debug/msprobe_guide.po | 592 +- .../optimization_and_tuning.po | 348 + .../performance_benchmark.po | 347 +- .../service_profiling_guide.po | 920 +- docs/source/locale/zh_CN/LC_MESSAGES/faqs.po | 919 +- docs/source/locale/zh_CN/LC_MESSAGES/index.po | 65 +- .../locale/zh_CN/LC_MESSAGES/installation.po | 537 +- .../locale/zh_CN/LC_MESSAGES/quick_start.po | 152 +- .../LC_MESSAGES/tutorials/features/index.po | 29 + ...ng_sequence_context_parallel_multi_node.po | 447 + ...g_sequence_context_parallel_single_node.po | 386 + .../pd_colocated_mooncake_multi_instance.po | 509 ++ .../pd_disaggregation_mooncake_multi_node.po | 471 + .../pd_disaggregation_mooncake_single_node.po | 213 + .../LC_MESSAGES/tutorials/features/ray.po | 219 + .../features/suffix_speculative_decoding.po | 854 ++ .../LC_MESSAGES/tutorials/hardwares/310p.po | 142 + .../LC_MESSAGES/tutorials/hardwares/index.po | 29 + .../tutorials/models/DeepSeek-R1.po | 364 + .../tutorials/models/DeepSeek-V3.1.po | 608 ++ .../tutorials/models/DeepSeek-V3.2.po | 396 + .../LC_MESSAGES/tutorials/models/GLM4.x.po | 528 ++ .../LC_MESSAGES/tutorials/models/GLM5.po | 475 + .../tutorials/models/Kimi-K2-Thinking.po | 134 + .../LC_MESSAGES/tutorials/models/Kimi-K2.5.po | 582 ++ .../tutorials/models/MiniMax-M2.po | 574 ++ .../tutorials/models/PaddleOCR-VL.po | 265 + .../tutorials/models/Qwen-VL-Dense.po | 360 + .../tutorials/models/Qwen2.5-7B.po | 279 + .../tutorials/models/Qwen2.5-Omni.po | 301 + .../tutorials/models/Qwen3-235B-A22B.po | 665 ++ .../tutorials/models/Qwen3-30B-A3B.po | 67 + .../tutorials/models/Qwen3-32B-W4A4.po | 88 + .../tutorials/models/Qwen3-8B-W4A8.po | 72 + .../tutorials/models/Qwen3-Coder-30B-A3B.po | 210 + .../tutorials/models/Qwen3-Dense.po | 866 ++ .../tutorials/models/Qwen3-Next.po | 269 + .../models/Qwen3-Omni-30B-A3B-Thinking.po | 230 + .../models/Qwen3-VL-235B-A22B-Instruct.po | 433 + .../models/Qwen3-VL-30B-A3B-Instruct.po | 199 + .../tutorials/models/Qwen3-VL-Embedding.po | 172 + .../tutorials/models/Qwen3-VL-Reranker.po | 190 + .../tutorials/models/Qwen3.5-27B.po | 402 + .../tutorials/models/Qwen3.5-397B-A17B.po | 542 ++ .../tutorials/models/Qwen3_embedding.po | 158 + .../tutorials/models/Qwen3_reranker.po | 165 + .../LC_MESSAGES/tutorials/models/index.po | 29 + .../configuration/additional_config.po | 567 +- .../user_guide/deployment_guide/index.po | 25 + .../deployment_guide/using_volcano_kthena.po | 293 + .../feature_guide/Fine_grained_TP.po | 307 + .../feature_guide/Multi_Token_Prediction.po | 233 + .../feature_guide/batch_invariance.po | 214 + .../feature_guide/context_parallel.po | 299 + .../user_guide/feature_guide/cpu_binding.po | 284 + .../user_guide/feature_guide/dynamic_batch.po | 108 + .../feature_guide/epd_disaggregation.po | 237 + .../feature_guide/eplb_swift_balancer.po | 247 + .../user_guide/feature_guide/external_dp.po | 164 + .../user_guide/feature_guide/graph_mode.po | 133 +- .../user_guide/feature_guide/kv_pool.po | 644 ++ .../feature_guide/large_scale_ep.po | 477 + .../feature_guide/layer_sharding.po | 185 + .../lmcache_ascend_deployment.po | 100 + .../user_guide/feature_guide/lora.po | 105 +- .../user_guide/feature_guide/netloader.po | 341 + .../user_guide/feature_guide/npugraph_ex.po | 61 + .../user_guide/feature_guide/quantization.po | 283 +- .../user_guide/feature_guide/rfork.po | 386 + .../feature_guide/sequence_parallelism.po | 435 + .../user_guide/feature_guide/sleep_mode.po | 152 +- .../feature_guide/speculative_decoding.po | 164 + .../feature_guide/structured_output.po | 216 +- .../feature_guide/ucm_deployment.po | 219 + .../feature_guide/weight_prefetch.po | 171 + .../LC_MESSAGES/user_guide/release_notes.po | 7756 +++++++++++++--- .../user_guide/support_matrix/index.po | 17 +- .../support_matrix/supported_features.po | 454 +- .../support_matrix/supported_models.po | 647 +- 102 files changed, 41760 insertions(+), 6023 deletions(-) create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/ACL_Graph.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/ModelRunner_prepare_inputs.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/add_custom_aclnn_op.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/context_parallel.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/cpu_binding.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/disaggregated_prefill.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/eplb_swift_balancer.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/npugraph_ex.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/quantization.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/multi_node_test.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/optimization_and_tuning.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/index.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_multi_node.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_single_node.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_colocated_mooncake_multi_instance.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_multi_node.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_single_node.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/ray.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/suffix_speculative_decoding.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/hardwares/310p.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/hardwares/index.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/DeepSeek-R1.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/DeepSeek-V3.1.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/DeepSeek-V3.2.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/GLM4.x.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/GLM5.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Kimi-K2-Thinking.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Kimi-K2.5.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/MiniMax-M2.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/PaddleOCR-VL.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen-VL-Dense.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen2.5-7B.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen2.5-Omni.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-235B-A22B.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-30B-A3B.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-32B-W4A4.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-8B-W4A8.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Coder-30B-A3B.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Dense.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Next.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-235B-A22B-Instruct.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-30B-A3B-Instruct.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-Embedding.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-Reranker.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3.5-27B.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3.5-397B-A17B.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3_embedding.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3_reranker.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/index.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/index.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/using_volcano_kthena.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Fine_grained_TP.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Multi_Token_Prediction.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/batch_invariance.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/context_parallel.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/cpu_binding.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/dynamic_batch.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/epd_disaggregation.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/eplb_swift_balancer.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/external_dp.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/layer_sharding.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lmcache_ascend_deployment.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/netloader.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/npugraph_ex.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/rfork.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sequence_parallelism.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/speculative_decoding.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/ucm_deployment.po create mode 100644 docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/weight_prefetch.po diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po b/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po index 4f5b7bf8..212e4553 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po @@ -4,1605 +4,6384 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../community/contributors.md:1 -msgid "Maintainers and contributors" -msgstr "维护者和贡献者" +#: ../../source/community/contributors.md:1 +msgid "Committers and Contributors" +msgstr "提交者和贡献者" -#: ../../community/contributors.md:3 -msgid "Maintainers" -msgstr "维护者" +#: ../../source/community/contributors.md:3 +msgid "Committers" +msgstr "提交者" -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "Name" -msgstr "名称" +msgstr "姓名" -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "Github ID" -msgstr "Github 账号" +msgstr "GitHub ID" -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "Date" msgstr "日期" -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "Xiyuan Wang" msgstr "Xiyuan Wang" -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "[@wangxiyuan](https://github.com/wangxiyuan)" msgstr "[@wangxiyuan](https://github.com/wangxiyuan)" -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "2025/01" msgstr "2025/01" -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "Yikun Jiang" msgstr "Yikun Jiang" -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "[@Yikun](https://github.com/Yikun)" msgstr "[@Yikun](https://github.com/Yikun)" -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "2025/02" msgstr "2025/02" -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "Yi Gan" msgstr "Yi Gan" -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "[@ganyi1996ppo](https://github.com/ganyi1996ppo)" msgstr "[@ganyi1996ppo](https://github.com/ganyi1996ppo)" -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "Shoujian Zheng" msgstr "Shoujian Zheng" -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "[@jianzs](https://github.com/jianzs)" msgstr "[@jianzs](https://github.com/jianzs)" -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "2025/06" msgstr "2025/06" -#: ../../community/contributors.md:12 -msgid "Contributors" -msgstr "贡献者" +#: ../../source/community/contributors.md +msgid "Wengang Chen" +msgstr "Wengang Chen" -#: ../../community/contributors.md:14 -msgid "" -"vLLM Ascend every release would not have been possible without the following" -" contributors:" -msgstr "每个 vLLM Ascend 版本的发布都离不开以下贡献者:" - -#: ../../community/contributors.md:16 -msgid "Updated on 2025-06-10:" -msgstr "更新于 2025-06-10:" - -#: ../../community/contributors.md -msgid "Number" -msgstr "数字" - -#: ../../community/contributors.md -msgid "Contributor" -msgstr "贡献者" - -#: ../../community/contributors.md -msgid "Commit ID" -msgstr "提交 ID" - -#: ../../community/contributors.md -msgid "83" -msgstr "83" - -#: ../../community/contributors.md -msgid "[@ZhengWG](https://github.com/)" -msgstr "[@ZhengWG](https://github.com/)" - -#: ../../community/contributors.md -msgid "2025/7/7" -msgstr "2025/7/7" - -#: ../../community/contributors.md -msgid "" -"[3a469de](https://github.com/vllm-project/vllm-" -"ascend/commit/9c886d0a1f0fc011692090b0395d734c83a469de)" -msgstr "" -"[3a469de](https://github.com/vllm-project/vllm-" -"ascend/commit/9c886d0a1f0fc011692090b0395d734c83a469de)" - -#: ../../community/contributors.md -msgid "82" -msgstr "82" - -#: ../../community/contributors.md -msgid "[@wm901115nwpu](https://github.com/)" -msgstr "[@wm901115nwpu](https://github.com/)" - -#: ../../community/contributors.md -msgid "" -"[a2a47d4](https://github.com/vllm-project/vllm-" -"ascend/commit/f08c4f15a27f0f27132f4ca7a0c226bf0a2a47d4)" -msgstr "" -"[a2a47d4](https://github.com/vllm-project/vllm-" -"ascend/commit/f08c4f15a27f0f27132f4ca7a0c226bf0a2a47d4)" - -#: ../../community/contributors.md -msgid "81" -msgstr "81" - -#: ../../community/contributors.md -msgid "[@Agonixiaoxiao](https://github.com/)" -msgstr "[@Agonixiaoxiao](https://github.com/)" - -#: ../../community/contributors.md -msgid "2025/7/2" -msgstr "2025/7/2" - -#: ../../community/contributors.md -msgid "" -"[6f84576](https://github.com/vllm-project/vllm-" -"ascend/commit/7fc1a984890bd930f670deedcb2dda3a46f84576)" -msgstr "" -"[6f84576](https://github.com/vllm-project/vllm-" -"ascend/commit/7fc1a984890bd930f670deedcb2dda3a46f84576)" - -#: ../../community/contributors.md -msgid "80" -msgstr "80" - -#: ../../community/contributors.md -msgid "[@zhanghw0354](https://github.com/zhanghw0354)" -msgstr "[@zhanghw0354](https://github.com/zhanghw0354)" - -#: ../../community/contributors.md -msgid "" -"[d3df9a5](https://github.com/vllm-project/vllm-" -"ascend/commit/9fb3d558e5b57a3c97ee5e11b9f5dba6ad3df9a5)" -msgstr "" -"[d3df9a5](https://github.com/vllm-project/vllm-" -"ascend/commit/9fb3d558e5b57a3c97ee5e11b9f5dba6ad3df9a5)" - -#: ../../community/contributors.md -msgid "79" -msgstr "79" - -#: ../../community/contributors.md -msgid "[@GDzhu01](https://github.com/GDzhu01)" -msgstr "[@GDzhu01](https://github.com/GDzhu01)" - -#: ../../community/contributors.md -msgid "2025/6/28" -msgstr "2025/6/28" - -#: ../../community/contributors.md -msgid "" -"[de256ac](https://github.com/vllm-project/vllm-" -"ascend/commit/b308a7a25897b88d4a23a9e3d583f4ec6de256ac)" -msgstr "" -"[de256ac](https://github.com/vllm-project/vllm-" -"ascend/commit/b308a7a25897b88d4a23a9e3d583f4ec6de256ac)" - -#: ../../community/contributors.md -msgid "78" -msgstr "78" - -#: ../../community/contributors.md -msgid "[@leo-pony](https://github.com/leo-pony)" -msgstr "[@leo-pony](https://github.com/leo-pony)" - -#: ../../community/contributors.md -msgid "2025/6/26" -msgstr "2025/6/26" - -#: ../../community/contributors.md -msgid "" -"[3f2a5f2](https://github.com/vllm-project/vllm-" -"ascend/commit/10253449120307e3b45f99d82218ba53e3f2a5f2)" -msgstr "" -"[3f2a5f2](https://github.com/vllm-project/vllm-" -"ascend/commit/10253449120307e3b45f99d82218ba53e3f2a5f2)" - -#: ../../community/contributors.md -msgid "77" -msgstr "77" - -#: ../../community/contributors.md -msgid "[@zeshengzong](https://github.com/zeshengzong)" -msgstr "[@zeshengzong](https://github.com/zeshengzong)" - -#: ../../community/contributors.md -msgid "" -"[3ee25aa](https://github.com/vllm-project/vllm-" -"ascend/commit/192dbbcc6e244a8471d3c00033dc637233ee25aa)" -msgstr "" -"[3ee25aa](https://github.com/vllm-project/vllm-" -"ascend/commit/192dbbcc6e244a8471d3c00033dc637233ee25aa)" - -#: ../../community/contributors.md -msgid "76" -msgstr "76" - -#: ../../community/contributors.md -msgid "[@sharonyunyun](https://github.com/sharonyunyun)" -msgstr "[@sharonyunyun](https://github.com/sharonyunyun)" - -#: ../../community/contributors.md -msgid "2025/6/25" -msgstr "2025/6/25" - -#: ../../community/contributors.md -msgid "" -"[2dd8666](https://github.com/vllm-project/vllm-" -"ascend/commit/941269a6c5bbc79f6c1b6abd4680dc5802dd8666)" -msgstr "" -"[2dd8666](https://github.com/vllm-project/vllm-" -"ascend/commit/941269a6c5bbc79f6c1b6abd4680dc5802dd8666)" - -#: ../../community/contributors.md -msgid "75" -msgstr "75" - -#: ../../community/contributors.md -msgid "[@Pr0Wh1teGivee](https://github.com/Pr0Wh1teGivee)" -msgstr "[@Pr0Wh1teGivee](https://github.com/Pr0Wh1teGivee)" - -#: ../../community/contributors.md -msgid "" -"[c65dd40](https://github.com/vllm-project/vllm-" -"ascend/commit/2fda60464c287fe456b4a2f27e63996edc65dd40)" -msgstr "" -"[c65dd40](https://github.com/vllm-project/vllm-" -"ascend/commit/2fda60464c287fe456b4a2f27e63996edc65dd40)" - -#: ../../community/contributors.md -msgid "74" -msgstr "74" - -#: ../../community/contributors.md -msgid "[@xleoken](https://github.com/xleoken)" -msgstr "[@xleoken](https://github.com/xleoken)" - -#: ../../community/contributors.md -msgid "2025/6/23" -msgstr "2025/6/23" - -#: ../../community/contributors.md -msgid "" -"[c604de0](https://github.com/vllm-project/vllm-" -"ascend/commit/4447e53d7ad5edcda978ca6b0a3a26a73c604de0)" -msgstr "" -"[c604de0](https://github.com/vllm-project/vllm-" -"ascend/commit/4447e53d7ad5edcda978ca6b0a3a26a73c604de0)" - -#: ../../community/contributors.md -msgid "73" -msgstr "73" - -#: ../../community/contributors.md -msgid "[@lyj-jjj](https://github.com/lyj-jjj)" -msgstr "[@lyj-jjj](https://github.com/lyj-jjj)" - -#: ../../community/contributors.md -msgid "" -"[5cbd74e](https://github.com/vllm-project/vllm-" -"ascend/commit/5177bef87a21331dcca11159d3d1438075cbd74e)" -msgstr "" -"[5cbd74e](https://github.com/vllm-project/vllm-" -"ascend/commit/5177bef87a21331dcca11159d3d1438075cbd74e)" - -#: ../../community/contributors.md -msgid "72" -msgstr "72" - -#: ../../community/contributors.md -msgid "[@farawayboat](https://github.com/farawayboat)" -msgstr "[@farawayboat](https://github.com/farawayboat)" - -#: ../../community/contributors.md -msgid "2025/6/21" -msgstr "2025/6/21" - -#: ../../community/contributors.md -msgid "" -"[bc7d392](https://github.com/vllm-project/vllm-" -"ascend/commit/097e7149f75c0806774bc68207f0f6270bc7d392)" -msgstr "" -"[bc7d392](https://github.com/vllm-project/vllm-" -"ascend/commit/097e7149f75c0806774bc68207f0f6270bc7d392)" - -#: ../../community/contributors.md -msgid "71" -msgstr "71" - -#: ../../community/contributors.md -msgid "[@yuancaoyaoHW](https://github.com/yuancaoyaoHW)" -msgstr "[@yuancaoyaoHW](https://github.com/yuancaoyaoHW)" - -#: ../../community/contributors.md -msgid "2025/6/20" -msgstr "2025/6/20" - -#: ../../community/contributors.md -msgid "" -"[7aa0b94](https://github.com/vllm-project/vllm-" -"ascend/commit/00ae250f3ced68317bc91c93dc1f1a0977aa0b94)" -msgstr "" -"[7aa0b94](https://github.com/vllm-project/vllm-" -"ascend/commit/00ae250f3ced68317bc91c93dc1f1a0977aa0b94)" - -#: ../../community/contributors.md -msgid "70" -msgstr "70" - -#: ../../community/contributors.md -msgid "[@songshanhu07](https://github.com/songshanhu07)" -msgstr "[@songshanhu07](https://github.com/songshanhu07)" - -#: ../../community/contributors.md -msgid "2025/6/18" -msgstr "2025/6/18" - -#: ../../community/contributors.md -msgid "" -"[5e1de1f](https://github.com/vllm-project/vllm-" -"ascend/commit/2a70dbbdb8f55002de3313e17dfd595e1de1f)" -msgstr "" -"[5e1de1f](https://github.com/vllm-project/vllm-" -"ascend/commit/2a70dbbdb8f55002de3313e17dfd595e1de1f)" - -#: ../../community/contributors.md -msgid "69" -msgstr "69" - -#: ../../community/contributors.md -msgid "[@wangyanhui-cmss](https://github.com/wangyanhui-cmss)" -msgstr "[@wangyanhui-cmss](https://github.com/wangyanhui-cmss)" - -#: ../../community/contributors.md -msgid "2025/6/12" -msgstr "2025/6/12" - -#: ../../community/contributors.md -msgid "" -"[40c9e88](https://github.com/vllm-project/vllm-" -"ascend/commit/2a5fb4014b863cee6abc3009f5bc5340c9e88)" -msgstr "" -"[40c9e88](https://github.com/vllm-project/vllm-" -"ascend/commit/2a5fb4014b863cee6abc3009f5bc5340c9e88)" - -#: ../../community/contributors.md -msgid "68" -msgstr "68" - -#: ../../community/contributors.md -msgid "[@chenwaner](https://github.com/chenwaner)" -msgstr "[@chenwaner](https://github.com/chenwaner)" - -#: ../../community/contributors.md -msgid "2025/6/11" -msgstr "2025/6/11" - -#: ../../community/contributors.md -msgid "" -"[c696169](https://github.com/vllm-project/vllm-" -"ascend/commit/e46dc142bf1180453c64226d76854fc1ec696169)" -msgstr "" -"[c696169](https://github.com/vllm-project/vllm-" -"ascend/commit/e46dc142bf1180453c64226d76854fc1ec696169)" - -#: ../../community/contributors.md -msgid "67" -msgstr "67" - -#: ../../community/contributors.md -msgid "[@yzim](https://github.com/yzim)" -msgstr "[@yzim](https://github.com/yzim)" - -#: ../../community/contributors.md -msgid "" -"[aaf701b](https://github.com/vllm-project/vllm-" -"ascend/commit/4153a5091b698c2270d160409e7fee73baaf701b)" -msgstr "" -"[aaf701b](https://github.com/vllm-project/vllm-" -"ascend/commit/4153a5091b698c2270d160409e7fee73baaf701b)" - -#: ../../community/contributors.md -msgid "66" -msgstr "66" - -#: ../../community/contributors.md -msgid "[@Yuxiao-Xu](https://github.com/Yuxiao-Xu)" -msgstr "[@Yuxiao-Xu](https://github.com/Yuxiao-Xu)" - -#: ../../community/contributors.md -msgid "2025/6/9" -msgstr "2025/6/9" - -#: ../../community/contributors.md -msgid "" -"[6b853f1](https://github.com/vllm-project/vllm-" -"ascend/commit/6b853f15fe69ba335d2745ebcf14a164d0bcc505)" -msgstr "" -"[6b853f1](https://github.com/vllm-project/vllm-" -"ascend/commit/6b853f15fe69ba335d2745ebcf14a164d0bcc505)" - -#: ../../community/contributors.md -msgid "65" -msgstr "65" - -#: ../../community/contributors.md -msgid "[@ChenTaoyu-SJTU](https://github.com/ChenTaoyu-SJTU)" -msgstr "[@ChenTaoyu-SJTU](https://github.com/ChenTaoyu-SJTU)" - -#: ../../community/contributors.md -msgid "2025/6/7" -msgstr "2025/6/7" - -#: ../../community/contributors.md -msgid "" -"[20dedba](https://github.com/vllm-project/vllm-" -"ascend/commit/20dedba5d1fc84b7ae8b49f9ce3e3649389e2193)" -msgstr "" -"[20dedba](https://github.com/vllm-project/vllm-" -"ascend/commit/20dedba5d1fc84b7ae8b49f9ce3e3649389e2193)" - -#: ../../community/contributors.md -msgid "64" -msgstr "64" - -#: ../../community/contributors.md -msgid "[@zxdukki](https://github.com/zxdukki)" -msgstr "[@zxdukki](https://github.com/zxdukki)" - -#: ../../community/contributors.md -msgid "" -"[87ebaef](https://github.com/vllm-project/vllm-" -"ascend/commit/87ebaef4e4e519988f27a6aa378f614642202ecf)" -msgstr "" -"[87ebaef](https://github.com/vllm-project/vllm-" -"ascend/commit/87ebaef4e4e519988f27a6aa378f614642202ecf)" - -#: ../../community/contributors.md -msgid "63" -msgstr "63" - -#: ../../community/contributors.md -msgid "[@sdmyzlp](https://github.com/sdmyzlp)" -msgstr "[@sdmyzlp](https://github.com/sdmyzlp)" - -#: ../../community/contributors.md -msgid "" -"[3640c60](https://github.com/vllm-project/vllm-" -"ascend/commit/3640c60b0eb4d4cb104e20bfa406d3f1d17920a7)" -msgstr "" -"[3640c60](https://github.com/vllm-project/vllm-" -"ascend/commit/3640c60b0eb4d4cb104e20bfa406d3f1d17920a7)" - -#: ../../community/contributors.md -msgid "62" -msgstr "62" - -#: ../../community/contributors.md -msgid "[@weijinqian0](https://github.com/weijinqian0)" -msgstr "[@weijinqian0](https://github.com/weijinqian0)" - -#: ../../community/contributors.md -msgid "" -"[e9ada68](https://github.com/vllm-project/vllm-" -"ascend/commit/e9ada685ece798f9fe0d4a287e3f5246a8a7207b)" -msgstr "" -"[e9ada68](https://github.com/vllm-project/vllm-" -"ascend/commit/e9ada685ece798f9fe0d4a287e3f5246a8a7207b)" - -#: ../../community/contributors.md -msgid "61" -msgstr "61" - -#: ../../community/contributors.md -msgid "[@hahazhky](https://github.com/hahazhky)" -msgstr "[@hahazhky](https://github.com/hahazhky)" - -#: ../../community/contributors.md -msgid "2025/6/6" -msgstr "2025/6/6" - -#: ../../community/contributors.md -msgid "" -"[0b12c2a](https://github.com/vllm-project/vllm-" -"ascend/commit/0b12c2acf7d9fd192beebebf662298067d9a5435)" -msgstr "" -"[0b12c2a](https://github.com/vllm-project/vllm-" -"ascend/commit/0b12c2acf7d9fd192beebebf662298067d9a5435)" - -#: ../../community/contributors.md -msgid "60" -msgstr "60" - -#: ../../community/contributors.md -msgid "[@depeng1994](https://github.com/depeng1994)" -msgstr "[@depeng1994](https://github.com/depeng1994)" - -#: ../../community/contributors.md -msgid "" -"[6b094a2](https://github.com/vllm-project/vllm-" -"ascend/commit/6b094a2bd49a8a41eb3647568b2d9e5b337db81f)" -msgstr "" -"[6b094a2](https://github.com/vllm-project/vllm-" -"ascend/commit/6b094a2bd49a8a41eb3647568b2d9e5b337db81f)" - -#: ../../community/contributors.md -msgid "59" -msgstr "59" - -#: ../../community/contributors.md -msgid "[@David9857](https://github.com/David9857)" -msgstr "[@David9857](https://github.com/David9857)" - -#: ../../community/contributors.md -msgid "2025/6/5" -msgstr "2025/6/5" - -#: ../../community/contributors.md -msgid "" -"[78431b3](https://github.com/vllm-project/vllm-" -"ascend/commit/78431b34694dfa3c8f54ed7cc626660318557927)" -msgstr "" -"[78431b3](https://github.com/vllm-project/vllm-" -"ascend/commit/78431b34694dfa3c8f54ed7cc626660318557927)" - -#: ../../community/contributors.md -msgid "58" -msgstr "58" - -#: ../../community/contributors.md -msgid "[@momo609](https://github.com/momo609)" -msgstr "[@momo609](https://github.com/momo609)" - -#: ../../community/contributors.md -msgid "" -"[908a851](https://github.com/vllm-project/vllm-" -"ascend/commit/908a851a776cfd9051cc062119e6ec481561c6f7)" -msgstr "" -"[908a851](https://github.com/vllm-project/vllm-" -"ascend/commit/908a851a776cfd9051cc062119e6ec481561c6f7)" - -#: ../../community/contributors.md -msgid "57" -msgstr "57" - -#: ../../community/contributors.md -msgid "[@zhangxinyuehfad](https://github.com/zhangxinyuehfad)" -msgstr "[@zhangxinyuehfad](https://github.com/zhangxinyuehfad)" - -#: ../../community/contributors.md -msgid "" -"[7737aaa](https://github.com/vllm-project/vllm-" -"ascend/commit/7737aaa40f699b233a35fb61e908b687adc1e2e5)" -msgstr "" -"[7737aaa](https://github.com/vllm-project/vllm-" -"ascend/commit/7737aaa40f699b233a35fb61e908b687adc1e2e5)" - -#: ../../community/contributors.md -msgid "56" -msgstr "56" - -#: ../../community/contributors.md -msgid "[@NINGBENZHE](https://github.com/NINGBENZHE)" -msgstr "[@NINGBENZHE](https://github.com/NINGBENZHE)" - -#: ../../community/contributors.md -msgid "2025/6/3" -msgstr "2025/6/3" - -#: ../../community/contributors.md -msgid "" -"[6ec64a3](https://github.com/vllm-project/vllm-" -"ascend/commit/6ec64a3f9686df65b5a23a41aa301e669db19099)" -msgstr "" -"[6ec64a3](https://github.com/vllm-project/vllm-" -"ascend/commit/6ec64a3f9686df65b5a23a41aa301e669db19099)" - -#: ../../community/contributors.md -msgid "55" -msgstr "55" - -#: ../../community/contributors.md -msgid "[@XWFAlone](https://github.com/XWFAlone)" -msgstr "[@XWFAlone](https://github.com/XWFAlone)" - -#: ../../community/contributors.md -msgid "2025/5/30" -msgstr "2025/5/30" - -#: ../../community/contributors.md -msgid "" -"[3442fbd](https://github.com/vllm-project/vllm-" -"ascend/commit/3442fbdb235b4c6d72c2bc64a49707a7bd89958e)" -msgstr "" -"[3442fbd](https://github.com/vllm-project/vllm-" -"ascend/commit/3442fbdb235b4c6d72c2bc64a49707a7bd89958e)" - -#: ../../community/contributors.md -msgid "54" -msgstr "54" - -#: ../../community/contributors.md -msgid "[@YisongJiang](https://github.com/YisongJiang)" -msgstr "[@YisongJiang](https://github.com/YisongJiang)" - -#: ../../community/contributors.md -msgid "2025/5/29" -msgstr "2025/5/29" - -#: ../../community/contributors.md -msgid "" -"[90afaf6](https://github.com/vllm-project/vllm-" -"ascend/commit/90afaf6306f680307462becf3c78585737579851)" -msgstr "" -"[90afaf6](https://github.com/vllm-project/vllm-" -"ascend/commit/90afaf6306f680307462becf3c78585737579851)" - -#: ../../community/contributors.md -msgid "53" -msgstr "53" - -#: ../../community/contributors.md -msgid "[@ponix-j](https://github.com/ponix-j)" -msgstr "[@ponix-j](https://github.com/ponix-j)" - -#: ../../community/contributors.md -msgid "2025/5/23" -msgstr "2025/5/23" - -#: ../../community/contributors.md -msgid "" -"[df58fb8](https://github.com/vllm-project/vllm-" -"ascend/commit/df58fb80eee24139fc61c495be3ce79cf81b3f73)" -msgstr "" -"[df58fb8](https://github.com/vllm-project/vllm-" -"ascend/commit/df58fb80eee24139fc61c495be3ce79cf81b3f73)" - -#: ../../community/contributors.md -msgid "52" -msgstr "52" - -#: ../../community/contributors.md -msgid "[@ttanzhiqiang](https://github.com/ttanzhiqiang)" -msgstr "[@ttanzhiqiang](https://github.com/ttanzhiqiang)" - -#: ../../community/contributors.md -msgid "" -"[dc6172e](https://github.com/vllm-project/vllm-" -"ascend/commit/dc6172efd3860ce95b40a7b3e93611f875f06d40)" -msgstr "" -"[dc6172e](https://github.com/vllm-project/vllm-" -"ascend/commit/dc6172efd3860ce95b40a7b3e93611f875f06d40)" - -#: ../../community/contributors.md -msgid "51" -msgstr "51" - -#: ../../community/contributors.md -msgid "[@yangpuPKU](https://github.com/yangpuPKU)" -msgstr "[@yangpuPKU](https://github.com/yangpuPKU)" - -#: ../../community/contributors.md -msgid "" -"[46df67a](https://github.com/vllm-project/vllm-" -"ascend/commit/46df67a5e9ab73fade08cbb2d8c0155cee7316d1)" -msgstr "" -"[46df67a](https://github.com/vllm-project/vllm-" -"ascend/commit/46df67a5e9ab73fade08cbb2d8c0155cee7316d1)" - -#: ../../community/contributors.md -msgid "50" -msgstr "50" - -#: ../../community/contributors.md -msgid "[@wonderful199082](https://github.com/wonderful199082)" -msgstr "[@wonderful199082](https://github.com/wonderful199082)" - -#: ../../community/contributors.md -msgid "2025/5/20" -msgstr "2025/5/20" - -#: ../../community/contributors.md -msgid "" -"[5cf9ff1](https://github.com/vllm-project/vllm-" -"ascend/commit/5cf9ff18e91b0b7031c258d71a257b8e24689763)" -msgstr "" -"[5cf9ff1](https://github.com/vllm-project/vllm-" -"ascend/commit/5cf9ff18e91b0b7031c258d71a257b8e24689763)" - -#: ../../community/contributors.md -msgid "49" -msgstr "49" - -#: ../../community/contributors.md -msgid "[@22dimensions](https://github.com/22dimensions)" -msgstr "[@22dimensions](https://github.com/22dimensions)" - -#: ../../community/contributors.md -msgid "2025/5/17" -msgstr "2025/5/17" - -#: ../../community/contributors.md -msgid "" -"[a8730e7](https://github.com/vllm-project/vllm-" -"ascend/commit/a8730e7a3c4ac6c4b39a5946c943252fdea6cce5)" -msgstr "" -"[a8730e7](https://github.com/vllm-project/vllm-" -"ascend/commit/a8730e7a3c4ac6c4b39a5946c943252fdea6cce5)" - -#: ../../community/contributors.md -msgid "48" -msgstr "48" - -#: ../../community/contributors.md -msgid "[@cxcxflying](https://github.com/cxcxflying)" -msgstr "[@cxcxflying](https://github.com/cxcxflying)" - -#: ../../community/contributors.md -msgid "2025/5/13" -msgstr "2025/5/13" - -#: ../../community/contributors.md -msgid "" -"[e564470](https://github.com/vllm-project/vllm-" -"ascend/commit/e56447033889ca95df512208cab22ef832bfdf07)" -msgstr "" -"[e564470](https://github.com/vllm-project/vllm-" -"ascend/commit/e56447033889ca95df512208cab22ef832bfdf07)" - -#: ../../community/contributors.md -msgid "47" -msgstr "47" - -#: ../../community/contributors.md -msgid "[@NeverRaR](https://github.com/NeverRaR)" -msgstr "[@NeverRaR](https://github.com/NeverRaR)" - -#: ../../community/contributors.md -msgid "2025/5/12" -msgstr "2025/5/12" - -#: ../../community/contributors.md -msgid "" -"[efabd72](https://github.com/vllm-project/vllm-" -"ascend/commit/efabd722eb757e49aa309c173bbec91ca8c4ced1)" -msgstr "" -"[efabd72](https://github.com/vllm-project/vllm-" -"ascend/commit/efabd722eb757e49aa309c173bbec91ca8c4ced1)" - -#: ../../community/contributors.md -msgid "46" -msgstr "46" - -#: ../../community/contributors.md -msgid "[@chris668899](https://github.com/chris668899)" -msgstr "[@chris668899](https://github.com/chris668899)" - -#: ../../community/contributors.md -msgid "2025/5/8" -msgstr "2025/5/8" - -#: ../../community/contributors.md -msgid "" -"[6c02088](https://github.com/vllm-project/vllm-" -"ascend/commit/6c020883a8332b5c519f4f6502733edd9b391c2b)" -msgstr "" -"[6c02088](https://github.com/vllm-project/vllm-" -"ascend/commit/6c020883a8332b5c519f4f6502733edd9b391c2b)" - -#: ../../community/contributors.md -msgid "45" -msgstr "45" - -#: ../../community/contributors.md -msgid "[@sunbaosong](https://github.com/sunbaosong)" -msgstr "[@sunbaosong](https://github.com/sunbaosong)" - -#: ../../community/contributors.md -msgid "2025/5/6" -msgstr "2025/5/6" - -#: ../../community/contributors.md -msgid "" -"[d6bfae8](https://github.com/vllm-project/vllm-" -"ascend/commit/d6bfae8eeebedf677b643b712d367a3a69c9cce4)" -msgstr "" -"[d6bfae8](https://github.com/vllm-project/vllm-" -"ascend/commit/d6bfae8eeebedf677b643b712d367a3a69c9cce4)" - -#: ../../community/contributors.md -msgid "44" -msgstr "44" - -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "[@ApsarasX](https://github.com/ApsarasX)" msgstr "[@ApsarasX](https://github.com/ApsarasX)" -#: ../../community/contributors.md -msgid "2025/4/29" -msgstr "2025/4/29" +#: ../../source/community/contributors.md +msgid "2025/08" +msgstr "2025/08" -#: ../../community/contributors.md -msgid "" -"[87975fa](https://github.com/vllm-project/vllm-" -"ascend/commit/87975fa058fe3f90d204ded42a08989a8dcb413e)" -msgstr "" -"[87975fa](https://github.com/vllm-project/vllm-" -"ascend/commit/87975fa058fe3f90d204ded42a08989a8dcb413e)" +#: ../../source/community/contributors.md +msgid "Mengqing Cao" +msgstr "Mengqing Cao" -#: ../../community/contributors.md -msgid "43" -msgstr "43" - -#: ../../community/contributors.md -msgid "[@zouyida2052](https://github.com/zouyida2052)" -msgstr "[@zouyida2052](https://github.com/zouyida2052)" - -#: ../../community/contributors.md -msgid "2025/4/28" -msgstr "2025/4/28" - -#: ../../community/contributors.md -msgid "" -"[b9528e6](https://github.com/vllm-project/vllm-" -"ascend/commit/b9528e6ecdc417cf444e55a0ce4a2bafdef0ea3b)" -msgstr "" -"[b9528e6](https://github.com/vllm-project/vllm-" -"ascend/commit/b9528e6ecdc417cf444e55a0ce4a2bafdef0ea3b)" - -#: ../../community/contributors.md -msgid "42" -msgstr "42" - -#: ../../community/contributors.md -msgid "[@ZhengJun9](https://github.com/ZhengJun9)" -msgstr "[@ZhengJun9](https://github.com/ZhengJun9)" - -#: ../../community/contributors.md -msgid "" -"[1791113](https://github.com/vllm-project/vllm-" -"ascend/commit/17911138c90d78a76bd691e9dcb56763db35b19f)" -msgstr "" -"[1791113](https://github.com/vllm-project/vllm-" -"ascend/commit/17911138c90d78a76bd691e9dcb56763db35b19f)" - -#: ../../community/contributors.md -msgid "41" -msgstr "41" - -#: ../../community/contributors.md -msgid "[@linfeng-yuan](https://github.com/linfeng-yuan)" -msgstr "[@linfeng-yuan](https://github.com/linfeng-yuan)" - -#: ../../community/contributors.md -msgid "" -"[2204e4d](https://github.com/vllm-project/vllm-" -"ascend/commit/2204e4d08f8e10cf9c30154a14eaa5ca956c2acd)" -msgstr "" -"[2204e4d](https://github.com/vllm-project/vllm-" -"ascend/commit/2204e4d08f8e10cf9c30154a14eaa5ca956c2acd)" - -#: ../../community/contributors.md -msgid "40" -msgstr "40" - -#: ../../community/contributors.md -msgid "2025/4/27" -msgstr "2025/4/27" - -#: ../../community/contributors.md -msgid "" -"[fa4a5d9](https://github.com/vllm-project/vllm-" -"ascend/commit/fa4a5d980e8845a88b9162cf169f0a5ab230f8a5)" -msgstr "" -"[fa4a5d9](https://github.com/vllm-project/vllm-" -"ascend/commit/fa4a5d980e8845a88b9162cf169f0a5ab230f8a5)" - -#: ../../community/contributors.md -msgid "39" -msgstr "39" - -#: ../../community/contributors.md -msgid "[@fakeYan](https://github.com/fakeYan)" -msgstr "[@fakeYan](https://github.com/fakeYan)" - -#: ../../community/contributors.md -msgid "2025/4/23" -msgstr "2025/4/23" - -#: ../../community/contributors.md -msgid "" -"[05bdcbe](https://github.com/vllm-project/vllm-" -"ascend/commit/05bdcbeae47c7fcb9b1c30cad059abf1d40b5421)" -msgstr "" -"[05bdcbe](https://github.com/vllm-project/vllm-" -"ascend/commit/05bdcbeae47c7fcb9b1c30cad059abf1d40b5421)" - -#: ../../community/contributors.md -msgid "38" -msgstr "38" - -#: ../../community/contributors.md -msgid "[@RongRongStudio](https://github.com/RongRongStudio)" -msgstr "[@RongRongStudio](https://github.com/RongRongStudio)" - -#: ../../community/contributors.md -msgid "2025/4/22" -msgstr "2025/4/22" - -#: ../../community/contributors.md -msgid "" -"[848e041](https://github.com/vllm-project/vllm-" -"ascend/commit/848e041a54732c923660dd02daf8e9bf439736a2)" -msgstr "" -"[848e041](https://github.com/vllm-project/vllm-" -"ascend/commit/848e041a54732c923660dd02daf8e9bf439736a2)" - -#: ../../community/contributors.md -msgid "37" -msgstr "37" - -#: ../../community/contributors.md -msgid "[@paulyu12](https://github.com/paulyu12)" -msgstr "[@paulyu12](https://github.com/paulyu12)" - -#: ../../community/contributors.md -msgid "2025/4/17" -msgstr "2025/4/17" - -#: ../../community/contributors.md -msgid "" -"[697908f](https://github.com/vllm-project/vllm-" -"ascend/commit/697908f5cd7c65a3a917ec1a962b0886efc98c7e)" -msgstr "" -"[697908f](https://github.com/vllm-project/vllm-" -"ascend/commit/697908f5cd7c65a3a917ec1a962b0886efc98c7e)" - -#: ../../community/contributors.md -msgid "36" -msgstr "36" - -#: ../../community/contributors.md -msgid "[@heartStrive1998](https://github.com/heartStrive1998)" -msgstr "[@heartStrive1998](https://github.com/heartStrive1998)" - -#: ../../community/contributors.md -msgid "2025/4/16" -msgstr "2025/4/16" - -#: ../../community/contributors.md -msgid "" -"[2f15503](https://github.com/vllm-project/vllm-" -"ascend/commit/2f155039dc3997640854daef469bbf0cb77dc6ed)" -msgstr "" -"[2f15503](https://github.com/vllm-project/vllm-" -"ascend/commit/2f155039dc3997640854daef469bbf0cb77dc6ed)" - -#: ../../community/contributors.md -msgid "35" -msgstr "35" - -#: ../../community/contributors.md -msgid "[@eeethenQ](https://github.com/eeethenQ)" -msgstr "[@eeethenQ](https://github.com/eeethenQ)" - -#: ../../community/contributors.md -msgid "2025/4/15" -msgstr "2025/4/15" - -#: ../../community/contributors.md -msgid "" -"[44a8301](https://github.com/vllm-project/vllm-" -"ascend/commit/44a8301424ded94dae83e13b837f5bfc0a1bfc15)" -msgstr "" -"[44a8301](https://github.com/vllm-project/vllm-" -"ascend/commit/44a8301424ded94dae83e13b837f5bfc0a1bfc15)" - -#: ../../community/contributors.md -msgid "34" -msgstr "34" - -#: ../../community/contributors.md -msgid "[@wxsIcey](https://github.com/wxsIcey)" -msgstr "[@wxsIcey](https://github.com/wxsIcey)" - -#: ../../community/contributors.md -msgid "2025/4/10" -msgstr "2025/4/10" - -#: ../../community/contributors.md -msgid "" -"[d05ea17](https://github.com/vllm-project/vllm-" -"ascend/commit/d05ea17427b82a506b97409a7de8359f18f565f7)" -msgstr "" -"[d05ea17](https://github.com/vllm-project/vllm-" -"ascend/commit/d05ea17427b82a506b97409a7de8359f18f565f7)" - -#: ../../community/contributors.md -msgid "33" -msgstr "33" - -#: ../../community/contributors.md -msgid "[@yx0716](https://github.com/yx0716)" -msgstr "[@yx0716](https://github.com/yx0716)" - -#: ../../community/contributors.md -msgid "2025/4/8" -msgstr "2025/4/8" - -#: ../../community/contributors.md -msgid "" -"[5d62393](https://github.com/vllm-project/vllm-" -"ascend/commit/5d6239306be9b0f5ac6dbaa137048c372a92ff20)" -msgstr "" -"[5d62393](https://github.com/vllm-project/vllm-" -"ascend/commit/5d6239306be9b0f5ac6dbaa137048c372a92ff20)" - -#: ../../community/contributors.md -msgid "32" -msgstr "32" - -#: ../../community/contributors.md -msgid "[@celestialli](https://github.com/celestialli)" -msgstr "[@celestialli](https://github.com/celestialli)" - -#: ../../community/contributors.md -msgid "2025/4/7" -msgstr "2025/4/7" - -#: ../../community/contributors.md -msgid "" -"[2b765dc](https://github.com/vllm-project/vllm-" -"ascend/commit/2b765dcc4974b1bafc26ff5da817ce7e652f0eb0)" -msgstr "" -"[2b765dc](https://github.com/vllm-project/vllm-" -"ascend/commit/2b765dcc4974b1bafc26ff5da817ce7e652f0eb0)" - -#: ../../community/contributors.md -msgid "31" -msgstr "31" - -#: ../../community/contributors.md -msgid "[@hfadzxy](https://github.com/hfadzxy)" -msgstr "[@hfadzxy](https://github.com/hfadzxy)" - -#: ../../community/contributors.md -msgid "2025/3/30" -msgstr "2025/3/30" - -#: ../../community/contributors.md -msgid "" -"[7beb433](https://github.com/vllm-project/vllm-" -"ascend/commit/7beb4339dc8047af9ef64db1d0a8c59ddbb3709f)" -msgstr "" -"[7beb433](https://github.com/vllm-project/vllm-" -"ascend/commit/7beb4339dc8047af9ef64db1d0a8c59ddbb3709f)" - -#: ../../community/contributors.md -msgid "30" -msgstr "30" - -#: ../../community/contributors.md -msgid "[@wuhuikx](https://github.com/wuhuikx)" -msgstr "[@wuhuikx](https://github.com/wuhuikx)" - -#: ../../community/contributors.md -msgid "2025/3/28" -msgstr "2025/3/28" - -#: ../../community/contributors.md -msgid "" -"[57a84bb](https://github.com/vllm-project/vllm-" -"ascend/commit/57a84bb7befeaa0dc62aa35fa406e4d6affbfcca)" -msgstr "" -"[57a84bb](https://github.com/vllm-project/vllm-" -"ascend/commit/57a84bb7befeaa0dc62aa35fa406e4d6affbfcca)" - -#: ../../community/contributors.md -msgid "29" -msgstr "29" - -#: ../../community/contributors.md -msgid "[@zzzzwwjj](https://github.com/zzzzwwjj)" -msgstr "[@zzzzwwjj](https://github.com/zzzzwwjj)" - -#: ../../community/contributors.md -msgid "" -"[12390af](https://github.com/vllm-project/vllm-" -"ascend/commit/12390af075962456ecc8233d8dcce7064b75f390)" -msgstr "" -"[12390af](https://github.com/vllm-project/vllm-" -"ascend/commit/12390af075962456ecc8233d8dcce7064b75f390)" - -#: ../../community/contributors.md -msgid "28" -msgstr "28" - -#: ../../community/contributors.md -msgid "" -"[27e86b9](https://github.com/vllm-project/vllm-" -"ascend/commit/27e86b993a6a810d818143ec9dbfc439a419fa77)" -msgstr "" -"[27e86b9](https://github.com/vllm-project/vllm-" -"ascend/commit/27e86b993a6a810d818143ec9dbfc439a419fa77)" - -#: ../../community/contributors.md -msgid "27" -msgstr "27" - -#: ../../community/contributors.md -msgid "[@ZhengZhenyu](https://github.com/ZhengZhenyu)" -msgstr "[@ZhengZhenyu](https://github.com/ZhengZhenyu)" - -#: ../../community/contributors.md -msgid "2025/3/26" -msgstr "2025/3/26" - -#: ../../community/contributors.md -msgid "" -"[0b5a964](https://github.com/vllm-project/vllm-" -"ascend/commit/0b5a9643fd6c3240d7ede669e37209d7ff433841)" -msgstr "" -"[0b5a964](https://github.com/vllm-project/vllm-" -"ascend/commit/0b5a9643fd6c3240d7ede669e37209d7ff433841)" - -#: ../../community/contributors.md -msgid "26" -msgstr "26" - -#: ../../community/contributors.md -msgid "[@baifanxxx](https://github.com/baifanxxx)" -msgstr "[@baifanxxx](https://github.com/baifanxxx)" - -#: ../../community/contributors.md -msgid "" -"[1225052](https://github.com/vllm-project/vllm-" -"ascend/commit/122505208ff6284f409846ca7294f4a4b9883285)" -msgstr "" -"[1225052](https://github.com/vllm-project/vllm-" -"ascend/commit/122505208ff6284f409846ca7294f4a4b9883285)" - -#: ../../community/contributors.md -msgid "25" -msgstr "25" - -#: ../../community/contributors.md -msgid "[@rjg-lyh](https://github.com/rjg-lyh)" -msgstr "[@rjg-lyh](https://github.com/rjg-lyh)" - -#: ../../community/contributors.md -msgid "2025/3/13" -msgstr "2025/3/13" - -#: ../../community/contributors.md -msgid "" -"[6512470](https://github.com/vllm-project/vllm-" -"ascend/commit/65124705fb39d4cc2c94c80254421e067a82fe50)" -msgstr "" -"[6512470](https://github.com/vllm-project/vllm-" -"ascend/commit/65124705fb39d4cc2c94c80254421e067a82fe50)" - -#: ../../community/contributors.md -msgid "24" -msgstr "24" - -#: ../../community/contributors.md -msgid "[@xiemingda-1002](https://github.com/xiemingda-1002)" -msgstr "[@xiemingda-1002](https://github.com/xiemingda-1002)" - -#: ../../community/contributors.md -msgid "2025/3/12" -msgstr "2025/3/12" - -#: ../../community/contributors.md -msgid "" -"[59ea23d](https://github.com/vllm-project/vllm-" -"ascend/commit/59ea23d0d394879d7f33de6fd22242539b9c3cc5)" -msgstr "" -"[59ea23d](https://github.com/vllm-project/vllm-" -"ascend/commit/59ea23d0d394879d7f33de6fd22242539b9c3cc5)" - -#: ../../community/contributors.md -msgid "23" -msgstr "23" - -#: ../../community/contributors.md -msgid "[@yiz-liu](https://github.com/yiz-liu)" -msgstr "[@yiz-liu](https://github.com/yiz-liu)" - -#: ../../community/contributors.md -msgid "2025/3/11" -msgstr "2025/3/11" - -#: ../../community/contributors.md -msgid "" -"[0db6670](https://github.com/vllm-project/vllm-" -"ascend/commit/0db6670bfab8cb1d84c9e7270df0a1d42d6ce7ca)" -msgstr "" -"[0db6670](https://github.com/vllm-project/vllm-" -"ascend/commit/0db6670bfab8cb1d84c9e7270df0a1d42d6ce7ca)" - -#: ../../community/contributors.md -msgid "22" -msgstr "22" - -#: ../../community/contributors.md -msgid "[@new-TonyWang](https://github.com/new-TonyWang)" -msgstr "[@new-TonyWang](https://github.com/new-TonyWang)" - -#: ../../community/contributors.md -msgid "" -"[dfb4e23](https://github.com/vllm-project/vllm-" -"ascend/commit/dfb4e23e9d820ac992a071c123bbe983c7b01b2e)" -msgstr "" -"[dfb4e23](https://github.com/vllm-project/vllm-" -"ascend/commit/dfb4e23e9d820ac992a071c123bbe983c7b01b2e)" - -#: ../../community/contributors.md -msgid "21" -msgstr "21" - -#: ../../community/contributors.md -msgid "[@mengwei805](https://github.com/mengwei805)" -msgstr "[@mengwei805](https://github.com/mengwei805)" - -#: ../../community/contributors.md -msgid "2025/3/6" -msgstr "2025/3/6" - -#: ../../community/contributors.md -msgid "" -"[8fcf3d1](https://github.com/vllm-project/vllm-" -"ascend/commit/8fcf3d1704084626db35c5dc82ade446508598d4)" -msgstr "" -"[8fcf3d1](https://github.com/vllm-project/vllm-" -"ascend/commit/8fcf3d1704084626db35c5dc82ade446508598d4)" - -#: ../../community/contributors.md -msgid "20" -msgstr "20" - -#: ../../community/contributors.md -msgid "[@baymax591](https://github.com/baymax591)" -msgstr "[@baymax591](https://github.com/baymax591)" - -#: ../../community/contributors.md -msgid "2025/2/28" -msgstr "2025/2/28" - -#: ../../community/contributors.md -msgid "" -"[e8131b9](https://github.com/vllm-project/vllm-" -"ascend/commit/e8131b99cf199f50a304e6e6fb125a1b95bcc92b)" -msgstr "" -"[e8131b9](https://github.com/vllm-project/vllm-" -"ascend/commit/e8131b99cf199f50a304e6e6fb125a1b95bcc92b)" - -#: ../../community/contributors.md -msgid "19" -msgstr "19" - -#: ../../community/contributors.md -msgid "[@dependabot](https://github.com/dependabot)" -msgstr "[@dependabot](https://github.com/dependabot)" - -#: ../../community/contributors.md -msgid "2025/2/27" -msgstr "2025/2/27" - -#: ../../community/contributors.md -msgid "" -"[a5564ed](https://github.com/vllm-project/vllm-" -"ascend/commit/a5564ed5d8fd9818936a22d9ea35951a27513b4c)" -msgstr "" -"[a5564ed](https://github.com/vllm-project/vllm-" -"ascend/commit/a5564ed5d8fd9818936a22d9ea35951a27513b4c)" - -#: ../../community/contributors.md -msgid "18" -msgstr "18" - -#: ../../community/contributors.md -msgid "[@shink](https://github.com/shink)" -msgstr "[@shink](https://github.com/shink)" - -#: ../../community/contributors.md -msgid "" -"[6aed833](https://github.com/vllm-project/vllm-" -"ascend/commit/6aed83335cbe92fd0b8ef07c28966a753d012ccb)" -msgstr "" -"[6aed833](https://github.com/vllm-project/vllm-" -"ascend/commit/6aed83335cbe92fd0b8ef07c28966a753d012ccb)" - -#: ../../community/contributors.md -msgid "17" -msgstr "17" - -#: ../../community/contributors.md -msgid "[@wwfu109](https://github.com/wwfu109)" -msgstr "[@wwfu109](https://github.com/wwfu109)" - -#: ../../community/contributors.md -msgid "" -"[b074047](https://github.com/vllm-project/vllm-" -"ascend/commit/b07404766bdaf6e3cebc5cb0aba89a247501302e)" -msgstr "" -"[b074047](https://github.com/vllm-project/vllm-" -"ascend/commit/b07404766bdaf6e3cebc5cb0aba89a247501302e)" - -#: ../../community/contributors.md -msgid "16" -msgstr "16" - -#: ../../community/contributors.md -msgid "[@kunpengW-code](https://github.com/kunpengW-code)" -msgstr "[@kunpengW-code](https://github.com/kunpengW-code)" - -#: ../../community/contributors.md -msgid "2025/2/26" -msgstr "2025/2/26" - -#: ../../community/contributors.md -msgid "" -"[ca807ce](https://github.com/vllm-project/vllm-" -"ascend/commit/ca807ce49ed64aa89242f5ae29b9862a77648b45)" -msgstr "" -"[ca807ce](https://github.com/vllm-project/vllm-" -"ascend/commit/ca807ce49ed64aa89242f5ae29b9862a77648b45)" - -#: ../../community/contributors.md -msgid "15" -msgstr "15" - -#: ../../community/contributors.md -msgid "[@Yaphets24](https://github.com/Yaphets24)" -msgstr "[@Yaphets24](https://github.com/Yaphets24)" - -#: ../../community/contributors.md -msgid "2025/2/22" -msgstr "2025/2/22" - -#: ../../community/contributors.md -msgid "" -"[d0b3cb4](https://github.com/vllm-project/vllm-" -"ascend/commit/d0b3cb4fa79d5fc7f8245a3c68885ce1fa030ba4)" -msgstr "" -"[d0b3cb4](https://github.com/vllm-project/vllm-" -"ascend/commit/d0b3cb4fa79d5fc7f8245a3c68885ce1fa030ba4)" - -#: ../../community/contributors.md -msgid "14" -msgstr "14" - -#: ../../community/contributors.md -msgid "[@noemotiovon](https://github.com/noemotiovon)" -msgstr "[@noemotiovon](https://github.com/noemotiovon)" - -#: ../../community/contributors.md -msgid "2025/2/21" -msgstr "2025/2/21" - -#: ../../community/contributors.md -msgid "" -"[202b39a](https://github.com/vllm-project/vllm-" -"ascend/commit/202b39a38c2869b0ecc3df486550fb555a2eb0c0)" -msgstr "" -"[202b39a](https://github.com/vllm-project/vllm-" -"ascend/commit/202b39a38c2869b0ecc3df486550fb555a2eb0c0)" - -#: ../../community/contributors.md -msgid "13" -msgstr "13" - -#: ../../community/contributors.md -msgid "[@SidaoY](https://github.com/SidaoY)" -msgstr "[@SidaoY](https://github.com/SidaoY)" - -#: ../../community/contributors.md -msgid "2025/2/18" -msgstr "2025/2/18" - -#: ../../community/contributors.md -msgid "" -"[718c763](https://github.com/vllm-project/vllm-" -"ascend/commit/718c7638555d12cd43ea2a9e497e185778b68595)" -msgstr "" -"[718c763](https://github.com/vllm-project/vllm-" -"ascend/commit/718c7638555d12cd43ea2a9e497e185778b68595)" - -#: ../../community/contributors.md -msgid "12" -msgstr "12" - -#: ../../community/contributors.md -msgid "[@ShiyaNiu](https://github.com/ShiyaNiu)" -msgstr "[@ShiyaNiu](https://github.com/ShiyaNiu)" - -#: ../../community/contributors.md -msgid "2025/2/17" -msgstr "2025/2/17" - -#: ../../community/contributors.md -msgid "" -"[36ea38f](https://github.com/vllm-project/vllm-" -"ascend/commit/36ea38fde56437ff1745bd95cd8d9e02a6578d38)" -msgstr "" -"[36ea38f](https://github.com/vllm-project/vllm-" -"ascend/commit/36ea38fde56437ff1745bd95cd8d9e02a6578d38)" - -#: ../../community/contributors.md -msgid "11" -msgstr "11" - -#: ../../community/contributors.md -msgid "[@ji-huazhong](https://github.com/ji-huazhong)" -msgstr "[@ji-huazhong](https://github.com/ji-huazhong)" - -#: ../../community/contributors.md -msgid "2025/2/12" -msgstr "2025/2/12" - -#: ../../community/contributors.md -msgid "" -"[c8b57d1](https://github.com/vllm-project/vllm-" -"ascend/commit/c8b57d10b24efcd9b4fadeb66cfbf66aa3dd5f82)" -msgstr "" -"[c8b57d1](https://github.com/vllm-project/vllm-" -"ascend/commit/c8b57d10b24efcd9b4fadeb66cfbf66aa3dd5f82)" - -#: ../../community/contributors.md -msgid "10" -msgstr "10" - -#: ../../community/contributors.md -msgid "[@Angazenn](https://github.com/Angazenn)" -msgstr "[@Angazenn](https://github.com/Angazenn)" - -#: ../../community/contributors.md -msgid "2025/2/11" -msgstr "2025/2/11" - -#: ../../community/contributors.md -msgid "" -"[7637759](https://github.com/vllm-project/vllm-" -"ascend/commit/7637759056028839c74960d9cfd3ce6275ee5d35)" -msgstr "" -"[7637759](https://github.com/vllm-project/vllm-" -"ascend/commit/7637759056028839c74960d9cfd3ce6275ee5d35)" - -#: ../../community/contributors.md -msgid "9" -msgstr "9" - -#: ../../community/contributors.md -msgid "[@whx-sjtu](https://github.com/whx-sjtu)" -msgstr "[@whx-sjtu](https://github.com/whx-sjtu)" - -#: ../../community/contributors.md -msgid "2025/2/7" -msgstr "2025/2/7" - -#: ../../community/contributors.md -msgid "" -"[8fc5dc9](https://github.com/vllm-project/vllm-" -"ascend/commit/8fc5dc966aaf4e174d1ec0d1902c40289411ec0e)" -msgstr "" -"[8fc5dc9](https://github.com/vllm-project/vllm-" -"ascend/commit/8fc5dc966aaf4e174d1ec0d1902c40289411ec0e)" - -#: ../../community/contributors.md -msgid "8" -msgstr "8" - -#: ../../community/contributors.md -msgid "[@zouyida2002](https://github.com/zouyida2002)" -msgstr "[@zouyida2002](https://github.com/zouyida2002)" - -#: ../../community/contributors.md -msgid "" -"[4495fc6](https://github.com/vllm-project/vllm-" -"ascend/commit/4495fc68389e3fb1ef14534c202948931e38446b)" -msgstr "" -"[4495fc6](https://github.com/vllm-project/vllm-" -"ascend/commit/4495fc68389e3fb1ef14534c202948931e38446b)" - -#: ../../community/contributors.md -msgid "7" -msgstr "7" - -#: ../../community/contributors.md -msgid "[@hw_whx](https://github.com/hw_whx)" -msgstr "[@hw_whx](https://github.com/hw_whx)" - -#: ../../community/contributors.md -msgid "" -"[7d16772](https://github.com/vllm-project/vllm-" -"ascend/commit/7d1677263bc6628ade33bb780455e0f6e5b9b27a)" -msgstr "" -"[7d16772](https://github.com/vllm-project/vllm-" -"ascend/commit/7d1677263bc6628ade33bb780455e0f6e5b9b27a)" - -#: ../../community/contributors.md -msgid "6" -msgstr "6" - -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "[@MengqingCao](https://github.com/MengqingCao)" msgstr "[@MengqingCao](https://github.com/MengqingCao)" -#: ../../community/contributors.md -msgid "2025/2/6" -msgstr "2025/2/6" +#: ../../source/community/contributors.md +msgid "Peng Yu" +msgstr "Peng Yu" -#: ../../community/contributors.md +#: ../../source/community/contributors.md +msgid "[@paulyu12](https://github.com/paulyu12)" +msgstr "[@paulyu12](https://github.com/paulyu12)" + +#: ../../source/community/contributors.md +msgid "2025/10" +msgstr "2025/10" + +#: ../../source/community/contributors.md +msgid "Yizhou Liu" +msgstr "Yizhou Liu" + +#: ../../source/community/contributors.md +msgid "[@yiz-liu](https://github.com/yiz-liu)" +msgstr "[@yiz-liu](https://github.com/yiz-liu)" + +#: ../../source/community/contributors.md +msgid "Jinqian Wei" +msgstr "Jinqian Wei" + +#: ../../source/community/contributors.md +msgid "[@weijinqian0](https://github.com/weijinqian0)" +msgstr "[@weijinqian0](https://github.com/weijinqian0)" + +#: ../../source/community/contributors.md +msgid "Chuanyu Qin" +msgstr "Chuanyu Qin" + +#: ../../source/community/contributors.md +msgid "[@nalinaly](https://github.com/nalinaly)" +msgstr "[@nalinaly](https://github.com/nalinaly)" + +#: ../../source/community/contributors.md +msgid "Jie Wen" +msgstr "Jie Wen" + +#: ../../source/community/contributors.md +msgid "[@zzzzwwjj](https://github.com/zzzzwwjj)" +msgstr "[@zzzzwwjj](https://github.com/zzzzwwjj)" + +#: ../../source/community/contributors.md +msgid "2025/12" +msgstr "2025/12" + +#: ../../source/community/contributors.md +msgid "Chao Lei" +msgstr "Chao Lei" + +#: ../../source/community/contributors.md +msgid "[@LCAIZJ](https://github.com/LCAIZJ)" +msgstr "[@LCAIZJ](https://github.com/LCAIZJ)" + +#: ../../source/community/contributors.md +msgid "JiaXu Liu" +msgstr "JiaXu Liu" + +#: ../../source/community/contributors.md +msgid "[@realliujiaxu](https://github.com/realliujiaxu)" +msgstr "[@realliujiaxu](https://github.com/realliujiaxu)" + +#: ../../source/community/contributors.md +msgid "HeXiang Wang" +msgstr "HeXiang Wang" + +#: ../../source/community/contributors.md +msgid "[@whx-sjtu](https://github.com/whx-sjtu)" +msgstr "[@whx-sjtu](https://github.com/whx-sjtu)" + +#: ../../source/community/contributors.md +msgid "2026/01" +msgstr "2026/01" + +#: ../../source/community/contributors.md:22 +msgid "Contributors" +msgstr "贡献者" + +#: ../../source/community/contributors.md:25 msgid "" -"[7d9ae22](https://github.com/vllm-project/vllm-" -"ascend/commit/7d9ae22ecb6dc3ea4e720e5109cf46e1ae7da730)" -msgstr "" -"[7d9ae22](https://github.com/vllm-project/vllm-" -"ascend/commit/7d9ae22ecb6dc3ea4e720e5109cf46e1ae7da730)" +"Every release of vLLM Ascend would not have been possible without the " +"following contributors:" +msgstr "vLLM Ascend 的每一次发布都离不开以下贡献者:" -#: ../../community/contributors.md +#: ../../source/community/contributors.md:27 +msgid "Updated on 2026-03-09:" +msgstr "更新于 2026-03-09:" + +#: ../../source/community/contributors.md +msgid "Number" +msgstr "编号" + +#: ../../source/community/contributors.md +msgid "Contributor" +msgstr "贡献者" + +#: ../../source/community/contributors.md +msgid "Commit ID" +msgstr "提交 ID" + +#: ../../source/community/contributors.md +msgid "345" +msgstr "345" + +#: ../../source/community/contributors.md +msgid "[@chenxi-hh](https://github.com/chenxi-hh)" +msgstr "[@chenxi-hh](https://github.com/chenxi-hh)" + +#: ../../source/community/contributors.md +msgid "2026/03/09" +msgstr "2026/03/09" + +#: ../../source/community/contributors.md +msgid "" +"[737dfcf](https://github.com/vllm-project/vllm-" +"ascend/commit/737dfcf638eae71d6c24c340dee20ff205f21ed9)" +msgstr "" +"[737dfcf](https://github.com/vllm-project/vllm-" +"ascend/commit/737dfcf638eae71d6c24c340dee20ff205f21ed9)" + +#: ../../source/community/contributors.md +msgid "344" +msgstr "344" + +#: ../../source/community/contributors.md +msgid "[@xiaocongtou6](https://github.com/xiaocongtou6)" +msgstr "[@xiaocongtou6](https://github.com/xiaocongtou6)" + +#: ../../source/community/contributors.md +msgid "2026/03/06" +msgstr "2026/03/06" + +#: ../../source/community/contributors.md +msgid "" +"[bc0fd7c](https://github.com/vllm-project/vllm-" +"ascend/commit/bc0fd7ca7217498d5faa91504b0e8c3f822a5cc6)" +msgstr "" +"[bc0fd7c](https://github.com/vllm-project/vllm-" +"ascend/commit/bc0fd7ca7217498d5faa91504b0e8c3f822a5cc6)" + +#: ../../source/community/contributors.md +msgid "343" +msgstr "343" + +#: ../../source/community/contributors.md +msgid "[@wanghengkang](https://github.com/wanghengkang)" +msgstr "[@wanghengkang](https://github.com/wanghengkang)" + +#: ../../source/community/contributors.md +msgid "" +"[c49ce18](https://github.com/vllm-project/vllm-" +"ascend/commit/c49ce18ea544970510ebb04fff49a484533fe2a3)" +msgstr "" +"[c49ce18](https://github.com/vllm-project/vllm-" +"ascend/commit/c49ce18ea544970510ebb04fff49a484533fe2a3)" + +#: ../../source/community/contributors.md +msgid "342" +msgstr "342" + +#: ../../source/community/contributors.md +msgid "[@Mind-s](https://github.com/Mind-s)" +msgstr "[@Mind-s](https://github.com/Mind-s)" + +#: ../../source/community/contributors.md +msgid "" +"[0f812dc](https://github.com/vllm-project/vllm-" +"ascend/commit/0f812dcc58514d4e01b683282fd5a22cbbc86036)" +msgstr "" +"[0f812dc](https://github.com/vllm-project/vllm-" +"ascend/commit/0f812dcc58514d4e01b683282fd5a22cbbc86036)" + +#: ../../source/community/contributors.md +msgid "341" +msgstr "341" + +#: ../../source/community/contributors.md +msgid "[@guleo](https://github.com/guleo)" +msgstr "[@guleo](https://github.com/guleo)" + +#: ../../source/community/contributors.md +msgid "" +"[18b52af](https://github.com/vllm-project/vllm-" +"ascend/commit/18b52afe2bb2e3ac80e988a2788ac1e3bbdb898a)" +msgstr "" +"[18b52af](https://github.com/vllm-project/vllm-" +"ascend/commit/18b52afe2bb2e3ac80e988a2788ac1e3bbdb898a)" + +#: ../../source/community/contributors.md +msgid "340" +msgstr "340" + +#: ../../source/community/contributors.md +msgid "[@songjianquan](https://github.com/songjianquan)" +msgstr "[@songjianquan](https://github.com/songjianquan)" + +#: ../../source/community/contributors.md +msgid "2026/03/05" +msgstr "2026/03/05" + +#: ../../source/community/contributors.md +msgid "" +"[43c8da3](https://github.com/vllm-project/vllm-" +"ascend/commit/43c8da3574c96b9aaeaf4ef360c9b4aaf6a3e305)" +msgstr "" +"[43c8da3](https://github.com/vllm-project/vllm-" +"ascend/commit/43c8da3574c96b9aaeaf4ef360c9b4aaf6a3e305)" + +#: ../../source/community/contributors.md +msgid "339" +msgstr "339" + +#: ../../source/community/contributors.md +msgid "[@liuchen2026fly](https://github.com/liuchen2026fly)" +msgstr "[@liuchen2026fly](https://github.com/liuchen2026fly)" + +#: ../../source/community/contributors.md +msgid "" +"[640ecd1](https://github.com/vllm-project/vllm-" +"ascend/commit/640ecd1b772b1c3dcdc57336b762cc02d011eba8)" +msgstr "" +"[640ecd1](https://github.com/vllm-project/vllm-" +"ascend/commit/640ecd1b772b1c3dcdc57336b762cc02d011eba8)" + +#: ../../source/community/contributors.md +msgid "338" +msgstr "338" + +#: ../../source/community/contributors.md +msgid "[@Zhujiyang2](https://github.com/Zhujiyang2)" +msgstr "[@Zhujiyang2](https://github.com/Zhujiyang2)" + +#: ../../source/community/contributors.md +msgid "2026/03/04" +msgstr "2026/03/04" + +#: ../../source/community/contributors.md +msgid "" +"[c3c2656](https://github.com/vllm-project/vllm-" +"ascend/commit/c3c265648f6fb3bf9ea2f6c0e43a4a2e67973d40)" +msgstr "" +"[c3c2656](https://github.com/vllm-project/vllm-" +"ascend/commit/c3c265648f6fb3bf9ea2f6c0e43a4a2e67973d40)" + +#: ../../source/community/contributors.md +msgid "337" +msgstr "337" + +#: ../../source/community/contributors.md +msgid "[@NJX-njx](https://github.com/NJX-njx)" +msgstr "[@NJX-njx](https://github.com/NJX-njx)" + +#: ../../source/community/contributors.md +msgid "" +"[c7fd7a2](https://github.com/vllm-project/vllm-" +"ascend/commit/c7fd7a25f7f874dc34149f4c235c952dc7c3e227)" +msgstr "" +"[c7fd7a2](https://github.com/vllm-project/vllm-" +"ascend/commit/c7fd7a25f7f874dc34149f4c235c952dc7c3e227)" + +#: ../../source/community/contributors.md +msgid "336" +msgstr "336" + +#: ../../source/community/contributors.md +msgid "[@tanhaoan333](https://github.com/tanhaoan333)" +msgstr "[@tanhaoan333](https://github.com/tanhaoan333)" + +#: ../../source/community/contributors.md +msgid "2026/03/03" +msgstr "2026/03/03" + +#: ../../source/community/contributors.md +msgid "" +"[15f6564](https://github.com/vllm-project/vllm-" +"ascend/commit/15f65649762992b17e10bda2d7f942618a10dfbd)" +msgstr "" +"[15f6564](https://github.com/vllm-project/vllm-" +"ascend/commit/15f65649762992b17e10bda2d7f942618a10dfbd)" + +#: ../../source/community/contributors.md +msgid "335" +msgstr "335" + +#: ../../source/community/contributors.md +msgid "[@Eric-dot](https://github.com/Eric-dot)" +msgstr "[@Eric-dot](https://github.com/Eric-dot)" + +#: ../../source/community/contributors.md +msgid "2026/03/02" +msgstr "2026/03/02" + +#: ../../source/community/contributors.md +msgid "" +"[3c66a97](https://github.com/vllm-project/vllm-" +"ascend/commit/3c66a970f29a4a976bbb4836e333bc58995b4a9d)" +msgstr "" +"[3c66a97](https://github.com/vllm-project/vllm-" +"ascend/commit/3c66a970f29a4a976bbb4836e333bc58995b4a9d)" + +#: ../../source/community/contributors.md +msgid "334" +msgstr "334" + +#: ../../source/community/contributors.md +msgid "[@wangbj127](https://github.com/wangbj127)" +msgstr "[@wangbj127](https://github.com/wangbj127)" + +#: ../../source/community/contributors.md +msgid "2026/02/26" +msgstr "2026/02/26" + +#: ../../source/community/contributors.md +msgid "" +"[169e434](https://github.com/vllm-project/vllm-" +"ascend/commit/169e434f78f03b963efb2779d8c64675313f9481)" +msgstr "" +"[169e434](https://github.com/vllm-project/vllm-" +"ascend/commit/169e434f78f03b963efb2779d8c64675313f9481)" + +#: ../../source/community/contributors.md +msgid "333" +msgstr "333" + +#: ../../source/community/contributors.md +msgid "[@Li-Yongwen](https://github.com/Li-Yongwen)" +msgstr "[@Li-Yongwen](https://github.com/Li-Yongwen)" + +#: ../../source/community/contributors.md +msgid "" +"[2870f7c](https://github.com/vllm-project/vllm-" +"ascend/commit/2870f7c8ad20754f5cc09cc5ea25044ffb3c6515)" +msgstr "" +"[2870f7c](https://github.com/vllm-project/vllm-" +"ascend/commit/2870f7c8ad20754f5cc09cc5ea25044ffb3c6515)" + +#: ../../source/community/contributors.md +msgid "332" +msgstr "332" + +#: ../../source/community/contributors.md +msgid "[@LoganJane](https://github.com/LoganJane)" +msgstr "[@LoganJane](https://github.com/LoganJane)" + +#: ../../source/community/contributors.md +msgid "2026/02/25" +msgstr "2026/02/25" + +#: ../../source/community/contributors.md +msgid "" +"[ed05173](https://github.com/vllm-project/vllm-" +"ascend/commit/ed051737e9984216c93cbea8790710c3485b3cee)" +msgstr "" +"[ed05173](https://github.com/vllm-project/vllm-" +"ascend/commit/ed051737e9984216c93cbea8790710c3485b3cee)" + +#: ../../source/community/contributors.md +msgid "331" +msgstr "331" + +#: ../../source/community/contributors.md +msgid "[@chenchuw886](https://github.com/chenchuw886)" +msgstr "[@chenchuw886](https://github.com/chenchuw886)" + +#: ../../source/community/contributors.md +msgid "" +"[3da2ba2](https://github.com/vllm-project/vllm-" +"ascend/commit/3da2ba22ebeef10ed31782488edb8120e3935bf7)" +msgstr "" +"[3da2ba2](https://github.com/vllm-project/vllm-" +"ascend/commit/3da2ba22ebeef10ed31782488edb8120e3935bf7)" + +#: ../../source/community/contributors.md +msgid "330" +msgstr "330" + +#: ../../source/community/contributors.md +msgid "[@Bowen-Leee](https://github.com/Bowen-Leee)" +msgstr "[@Bowen-Leee](https://github.com/Bowen-Leee)" + +#: ../../source/community/contributors.md +msgid "" +"[e3927cc](https://github.com/vllm-project/vllm-" +"ascend/commit/e3927cc8f57ee9bdc58db1d0365dfb6af9375b02)" +msgstr "" +"[e3927cc](https://github.com/vllm-project/vllm-" +"ascend/commit/e3927cc8f57ee9bdc58db1d0365dfb6af9375b02)" + +#: ../../source/community/contributors.md +msgid "329" +msgstr "329" + +#: ../../source/community/contributors.md +msgid "[@lijiahang226](https://github.com/lijiahang226)" +msgstr "[@lijiahang226](https://github.com/lijiahang226)" + +#: ../../source/community/contributors.md +msgid "2026/02/24" +msgstr "2026/02/24" + +#: ../../source/community/contributors.md +msgid "" +"[ff43537](https://github.com/vllm-project/vllm-" +"ascend/commit/ff4353772783470579254f5aa01d05c08117e981)" +msgstr "" +"[ff43537](https://github.com/vllm-project/vllm-" +"ascend/commit/ff4353772783470579254f5aa01d05c08117e981)" + +#: ../../source/community/contributors.md +msgid "328" +msgstr "328" + +#: ../../source/community/contributors.md +msgid "[@Spicy-Stick](https://github.com/Spicy-Stick)" +msgstr "[@Spicy-Stick](https://github.com/Spicy-Stick)" + +#: ../../source/community/contributors.md +msgid "2026/02/14" +msgstr "2026/02/14" + +#: ../../source/community/contributors.md +msgid "" +"[64aea60](https://github.com/vllm-project/vllm-" +"ascend/commit/64aea60f2e0256e6cdfd505cca9243972bcccc7c)" +msgstr "" +"[64aea60](https://github.com/vllm-project/vllm-" +"ascend/commit/64aea60f2e0256e6cdfd505cca9243972bcccc7c)" + +#: ../../source/community/contributors.md +msgid "327" +msgstr "327" + +#: ../../source/community/contributors.md +msgid "[@yejj710](https://github.com/yejj710)" +msgstr "[@yejj710](https://github.com/yejj710)" + +#: ../../source/community/contributors.md +msgid "2026/02/12" +msgstr "2026/02/12" + +#: ../../source/community/contributors.md +msgid "" +"[8b23554](https://github.com/vllm-project/vllm-" +"ascend/commit/8b2355474147374bd1abdb6832510b91ef591b33)" +msgstr "" +"[8b23554](https://github.com/vllm-project/vllm-" +"ascend/commit/8b2355474147374bd1abdb6832510b91ef591b33)" + +#: ../../source/community/contributors.md +msgid "326" +msgstr "326" + +#: ../../source/community/contributors.md +msgid "[@taoyao1221](https://github.com/taoyao1221)" +msgstr "[@taoyao1221](https://github.com/taoyao1221)" + +#: ../../source/community/contributors.md +msgid "" +"[41d056f](https://github.com/vllm-project/vllm-" +"ascend/commit/41d056f94716d2ac74b73f26547a109c099e82dd)" +msgstr "" +"[41d056f](https://github.com/vllm-project/vllm-" +"ascend/commit/41d056f94716d2ac74b73f26547a109c099e82dd)" + +#: ../../source/community/contributors.md +msgid "325" +msgstr "325" + +#: ../../source/community/contributors.md +msgid "[@nakairika](https://github.com/nakairika)" +msgstr "[@nakairika](https://github.com/nakairika)" + +#: ../../source/community/contributors.md +msgid "" +"[b86ea66](https://github.com/vllm-project/vllm-" +"ascend/commit/b86ea66b0a2de3795609804b3f93538563f6dc8d)" +msgstr "" +"[b86ea66](https://github.com/vllm-project/vllm-" +"ascend/commit/b86ea66b0a2de3795609804b3f93538563f6dc8d)" + +#: ../../source/community/contributors.md +msgid "324" +msgstr "324" + +#: ../../source/community/contributors.md +msgid "[@mikequan0425](https://github.com/mikequan0425)" +msgstr "[@mikequan0425](https://github.com/mikequan0425)" + +#: ../../source/community/contributors.md +msgid "" +"[7221045](https://github.com/vllm-project/vllm-" +"ascend/commit/7221045777bd4c3d77037fcb20d9bddd7b4dba3c)" +msgstr "" +"[7221045](https://github.com/vllm-project/vllm-" +"ascend/commit/7221045777bd4c3d77037fcb20d9bddd7b4dba3c)" + +#: ../../source/community/contributors.md +msgid "323" +msgstr "323" + +#: ../../source/community/contributors.md +msgid "[@lih827](https://github.com/lih827)" +msgstr "[@lih827](https://github.com/lih827)" + +#: ../../source/community/contributors.md +msgid "" +"[f718120](https://github.com/vllm-project/vllm-" +"ascend/commit/f71812011d49013ca077cc2441412593b7356052)" +msgstr "" +"[f718120](https://github.com/vllm-project/vllm-" +"ascend/commit/f71812011d49013ca077cc2441412593b7356052)" + +#: ../../source/community/contributors.md +msgid "322" +msgstr "322" + +#: ../../source/community/contributors.md +msgid "[@huyq](https://github.com/huyq)" +msgstr "[@huyq](https://github.com/huyq)" + +#: ../../source/community/contributors.md +msgid "" +"[a0315f6](https://github.com/vllm-project/vllm-" +"ascend/commit/a0315f66972d64cd586cb2ef29d8484f51e3194a)" +msgstr "" +"[a0315f6](https://github.com/vllm-project/vllm-" +"ascend/commit/a0315f66972d64cd586cb2ef29d8484f51e3194a)" + +#: ../../source/community/contributors.md +msgid "321" +msgstr "321" + +#: ../../source/community/contributors.md +msgid "[@yydyzr](https://github.com/yydyzr)" +msgstr "[@yydyzr](https://github.com/yydyzr)" + +#: ../../source/community/contributors.md +msgid "2026/02/11" +msgstr "2026/02/11" + +#: ../../source/community/contributors.md +msgid "" +"[ff3a50d](https://github.com/vllm-project/vllm-" +"ascend/commit/ff3a50d011dcbea08f87ebed69ff1bf156dbb01e)" +msgstr "" +"[ff3a50d](https://github.com/vllm-project/vllm-" +"ascend/commit/ff3a50d011dcbea08f87ebed69ff1bf156dbb01e)" + +#: ../../source/community/contributors.md +msgid "320" +msgstr "320" + +#: ../../source/community/contributors.md +msgid "[@luomin2005](https://github.com/luomin2005)" +msgstr "[@luomin2005](https://github.com/luomin2005)" + +#: ../../source/community/contributors.md +msgid "" +"[0c1cfa2](https://github.com/vllm-project/vllm-" +"ascend/commit/0c1cfa2baca623ae8822d934c5941aac4efd57c4)" +msgstr "" +"[0c1cfa2](https://github.com/vllm-project/vllm-" +"ascend/commit/0c1cfa2baca623ae8822d934c5941aac4efd57c4)" + +#: ../../source/community/contributors.md +msgid "319" +msgstr "319" + +#: ../../source/community/contributors.md +msgid "[@GoCHug](https://github.com/GoCHug)" +msgstr "[@GoCHug](https://github.com/GoCHug)" + +#: ../../source/community/contributors.md +msgid "2026/02/09" +msgstr "2026/02/09" + +#: ../../source/community/contributors.md +msgid "" +"[80e5812](https://github.com/vllm-project/vllm-" +"ascend/commit/80e5812b39bd3b19efa8a6af65658f599bd6cae0)" +msgstr "" +"[80e5812](https://github.com/vllm-project/vllm-" +"ascend/commit/80e5812b39bd3b19efa8a6af65658f599bd6cae0)" + +#: ../../source/community/contributors.md +msgid "318" +msgstr "318" + +#: ../../source/community/contributors.md +msgid "[@acat-rw](https://github.com/acat-rw)" +msgstr "[@acat-rw](https://github.com/acat-rw)" + +#: ../../source/community/contributors.md +msgid "2026/02/05" +msgstr "2026/02/05" + +#: ../../source/community/contributors.md +msgid "" +"[8e66299](https://github.com/vllm-project/vllm-" +"ascend/commit/8e66299bf16a7b48cf85b080dc3f8927144d5add)" +msgstr "" +"[8e66299](https://github.com/vllm-project/vllm-" +"ascend/commit/8e66299bf16a7b48cf85b080dc3f8927144d5add)" + +#: ../../source/community/contributors.md +msgid "317" +msgstr "317" + +#: ../../source/community/contributors.md +msgid "[@Zhang-Bryan](https://github.com/Zhang-Bryan)" +msgstr "[@Zhang-Bryan](https://github.com/Zhang-Bryan)" + +#: ../../source/community/contributors.md +msgid "2026/02/04" +msgstr "2026/02/04" + +#: ../../source/community/contributors.md +msgid "" +"[804a9ec](https://github.com/vllm-project/vllm-" +"ascend/commit/804a9ec4e6365aa484e3f9db61e226598b406ee6)" +msgstr "" +"[804a9ec](https://github.com/vllm-project/vllm-" +"ascend/commit/804a9ec4e6365aa484e3f9db61e226598b406ee6)" + +#: ../../source/community/contributors.md +msgid "316" +msgstr "316" + +#: ../../source/community/contributors.md +msgid "[@IWantFight](https://github.com/IWantFight)" +msgstr "[@IWantFight](https://github.com/IWantFight)" + +#: ../../source/community/contributors.md +msgid "" +"[e7a13be](https://github.com/vllm-project/vllm-" +"ascend/commit/e7a13beedb74b39dffd9f9dd1dd47bed32360206)" +msgstr "" +"[e7a13be](https://github.com/vllm-project/vllm-" +"ascend/commit/e7a13beedb74b39dffd9f9dd1dd47bed32360206)" + +#: ../../source/community/contributors.md +msgid "315" +msgstr "315" + +#: ../../source/community/contributors.md +msgid "[@SkychenLee](https://github.com/SkychenLee)" +msgstr "[@SkychenLee](https://github.com/SkychenLee)" + +#: ../../source/community/contributors.md +msgid "2026/02/02" +msgstr "2026/02/02" + +#: ../../source/community/contributors.md +msgid "" +"[dc225e1](https://github.com/vllm-project/vllm-" +"ascend/commit/dc225e1bb00d0f66100cbd83550b826308162f62)" +msgstr "" +"[dc225e1](https://github.com/vllm-project/vllm-" +"ascend/commit/dc225e1bb00d0f66100cbd83550b826308162f62)" + +#: ../../source/community/contributors.md +msgid "314" +msgstr "314" + +#: ../../source/community/contributors.md +msgid "[@LQLlulu](https://github.com/LQLlulu)" +msgstr "[@LQLlulu](https://github.com/LQLlulu)" + +#: ../../source/community/contributors.md +msgid "" +"[cb1212f](https://github.com/vllm-project/vllm-" +"ascend/commit/cb1212f18eae5611c60523f1bd8ab5ecb2a431a1)" +msgstr "" +"[cb1212f](https://github.com/vllm-project/vllm-" +"ascend/commit/cb1212f18eae5611c60523f1bd8ab5ecb2a431a1)" + +#: ../../source/community/contributors.md +msgid "313" +msgstr "313" + +#: ../../source/community/contributors.md +msgid "[@huangazazaz](https://github.com/huangazazaz)" +msgstr "[@huangazazaz](https://github.com/huangazazaz)" + +#: ../../source/community/contributors.md +msgid "" +"[c1618a0](https://github.com/vllm-project/vllm-" +"ascend/commit/c1618a04273e967616e40551a86f370e7a76222b)" +msgstr "" +"[c1618a0](https://github.com/vllm-project/vllm-" +"ascend/commit/c1618a04273e967616e40551a86f370e7a76222b)" + +#: ../../source/community/contributors.md +msgid "312" +msgstr "312" + +#: ../../source/community/contributors.md +msgid "[@wxh571001500](https://github.com/wxh571001500)" +msgstr "[@wxh571001500](https://github.com/wxh571001500)" + +#: ../../source/community/contributors.md +msgid "2026/01/30" +msgstr "2026/01/30" + +#: ../../source/community/contributors.md +msgid "" +"[47f5d4b](https://github.com/vllm-project/vllm-" +"ascend/commit/47f5d4b941c8e23821f4e98d0a5e470e6bcb13e3)" +msgstr "" +"[47f5d4b](https://github.com/vllm-project/vllm-" +"ascend/commit/47f5d4b941c8e23821f4e98d0a5e470e6bcb13e3)" + +#: ../../source/community/contributors.md +msgid "311" +msgstr "311" + +#: ../../source/community/contributors.md +msgid "[@wubin58](https://github.com/wubin58)" +msgstr "[@wubin58](https://github.com/wubin58)" + +#: ../../source/community/contributors.md +msgid "" +"[4230bc8](https://github.com/vllm-project/vllm-" +"ascend/commit/4230bc86461e4b879e0b434eaa11cfbd2bf83ef7)" +msgstr "" +"[4230bc8](https://github.com/vllm-project/vllm-" +"ascend/commit/4230bc86461e4b879e0b434eaa11cfbd2bf83ef7)" + +#: ../../source/community/contributors.md +msgid "310" +msgstr "310" + +#: ../../source/community/contributors.md +msgid "[@serlar](https://github.com/serlar)" +msgstr "[@serlar](https://github.com/serlar)" + +#: ../../source/community/contributors.md +msgid "" +"[77ea873](https://github.com/vllm-project/vllm-" +"ascend/commit/77ea8732241bfd80160824b17d1aee75909e1c24)" +msgstr "" +"[77ea873](https://github.com/vllm-project/vllm-" +"ascend/commit/77ea8732241bfd80160824b17d1aee75909e1c24)" + +#: ../../source/community/contributors.md +msgid "309" +msgstr "309" + +#: ../../source/community/contributors.md +msgid "[@Sergey-Zlobin](https://github.com/Sergey-Zlobin)" +msgstr "[@Sergey-Zlobin](https://github.com/Sergey-Zlobin)" + +#: ../../source/community/contributors.md +msgid "2026/01/29" +msgstr "2026/01/29" + +#: ../../source/community/contributors.md +msgid "" +"[6a7b3bc](https://github.com/vllm-project/vllm-" +"ascend/commit/6a7b3bc29cdd4c0bc25c87396b7567bb46c58607)" +msgstr "" +"[6a7b3bc](https://github.com/vllm-project/vllm-" +"ascend/commit/6a7b3bc29cdd4c0bc25c87396b7567bb46c58607)" + +#: ../../source/community/contributors.md +msgid "308" +msgstr "308" + +#: ../../source/community/contributors.md +msgid "[@HengjiaL](https://github.com/HengjiaL)" +msgstr "[@HengjiaL](https://github.com/HengjiaL)" + +#: ../../source/community/contributors.md +msgid "2026/01/28" +msgstr "2026/01/28" + +#: ../../source/community/contributors.md +msgid "" +"[c1e88a5](https://github.com/vllm-project/vllm-" +"ascend/commit/c1e88a55e8e2812eab4714be1f042eaa3eaf9927)" +msgstr "" +"[c1e88a5](https://github.com/vllm-project/vllm-" +"ascend/commit/c1e88a55e8e2812eab4714be1f042eaa3eaf9927)" + +#: ../../source/community/contributors.md +msgid "307" +msgstr "307" + +#: ../../source/community/contributors.md +msgid "[@pu-zhe](https://github.com/pu-zhe)" +msgstr "[@pu-zhe](https://github.com/pu-zhe)" + +#: ../../source/community/contributors.md +msgid "2026/01/27" +msgstr "2026/01/27" + +#: ../../source/community/contributors.md +msgid "" +"[57fd6e4](https://github.com/vllm-project/vllm-" +"ascend/commit/57fd6e4bd9c8025c9d5192b74d71e476b794ddba)" +msgstr "" +"[57fd6e4](https://github.com/vllm-project/vllm-" +"ascend/commit/57fd6e4bd9c8025c9d5192b74d71e476b794ddba)" + +#: ../../source/community/contributors.md +msgid "306" +msgstr "306" + +#: ../../source/community/contributors.md +msgid "[@mengchengTang](https://github.com/mengchengTang)" +msgstr "[@mengchengTang](https://github.com/mengchengTang)" + +#: ../../source/community/contributors.md +msgid "305" +msgstr "305" + +#: ../../source/community/contributors.md +msgid "[@huangfeifei1995](https://github.com/huangfeifei1995)" +msgstr "[@huangfeifei1995](https://github.com/huangfeifei1995)" + +#: ../../source/community/contributors.md +msgid "2026/01/26" +msgstr "2026/01/26" + +#: ../../source/community/contributors.md +msgid "" +"[ce11fd4](https://github.com/vllm-project/vllm-" +"ascend/commit/ce11fd49f3f636e7d1ceea8787683c0a0ca60f24)" +msgstr "" +"[ce11fd4](https://github.com/vllm-project/vllm-" +"ascend/commit/ce11fd49f3f636e7d1ceea8787683c0a0ca60f24)" + +#: ../../source/community/contributors.md +msgid "304" +msgstr "304" + +#: ../../source/community/contributors.md +msgid "[@gjc0824](https://github.com/gjc0824)" +msgstr "[@gjc0824](https://github.com/gjc0824)" + +#: ../../source/community/contributors.md +msgid "" +"[b390e0e](https://github.com/vllm-project/vllm-" +"ascend/commit/b390e0ef78afb31d5ee26fdd26e4f33e89d645b3)" +msgstr "" +"[b390e0e](https://github.com/vllm-project/vllm-" +"ascend/commit/b390e0ef78afb31d5ee26fdd26e4f33e89d645b3)" + +#: ../../source/community/contributors.md +msgid "303" +msgstr "303" + +#: ../../source/community/contributors.md +msgid "[@Agent-Skill-007](https://github.com/Agent-Skill-007)" +msgstr "[@Agent-Skill-007](https://github.com/Agent-Skill-007)" + +#: ../../source/community/contributors.md +msgid "2026/01/24" +msgstr "2026/01/24" + +#: ../../source/community/contributors.md +msgid "" +"[8129c42](https://github.com/vllm-project/vllm-" +"ascend/commit/8129c429efa232e38a839654efeb9d708adb66c0)" +msgstr "" +"[8129c42](https://github.com/vllm-project/vllm-" +"ascend/commit/8129c429efa232e38a839654efeb9d708adb66c0)" + +#: ../../source/community/contributors.md +msgid "302" +msgstr "302" + +#: ../../source/community/contributors.md +msgid "[@yjmyl](https://github.com/yjmyl)" +msgstr "[@yjmyl](https://github.com/yjmyl)" + +#: ../../source/community/contributors.md +msgid "2026/01/23" +msgstr "2026/01/23" + +#: ../../source/community/contributors.md +msgid "" +"[e90b141](https://github.com/vllm-project/vllm-" +"ascend/commit/e90b14140b5ff2e05fc8c5b812eedea51ffc3ee5)" +msgstr "" +"[e90b141](https://github.com/vllm-project/vllm-" +"ascend/commit/e90b14140b5ff2e05fc8c5b812eedea51ffc3ee5)" + +#: ../../source/community/contributors.md +msgid "301" +msgstr "301" + +#: ../../source/community/contributors.md +msgid "[@simplzyu](https://github.com/simplzyu)" +msgstr "[@simplzyu](https://github.com/simplzyu)" + +#: ../../source/community/contributors.md +msgid "" +"[f8d03d2](https://github.com/vllm-project/vllm-" +"ascend/commit/f8d03d21f1fc94cfe14cd1d9430621624ecad76d)" +msgstr "" +"[f8d03d2](https://github.com/vllm-project/vllm-" +"ascend/commit/f8d03d21f1fc94cfe14cd1d9430621624ecad76d)" + +#: ../../source/community/contributors.md +msgid "300" +msgstr "300" + +#: ../../source/community/contributors.md +msgid "[@maxmgrdv](https://github.com/maxmgrdv)" +msgstr "[@maxmgrdv](https://github.com/maxmgrdv)" + +#: ../../source/community/contributors.md +msgid "2026/01/22" +msgstr "2026/01/22" + +#: ../../source/community/contributors.md +msgid "" +"[ef9d836](https://github.com/vllm-project/vllm-" +"ascend/commit/ef9d8367f5c7d62aeebc3eb790487afca5b49b88)" +msgstr "" +"[ef9d836](https://github.com/vllm-project/vllm-" +"ascend/commit/ef9d8367f5c7d62aeebc3eb790487afca5b49b88)" + +#: ../../source/community/contributors.md +msgid "299" +msgstr "299" + +#: ../../source/community/contributors.md +msgid "[@guanguan0308](https://github.com/guanguan0308)" +msgstr "[@guanguan0308](https://github.com/guanguan0308)" + +#: ../../source/community/contributors.md +msgid "2026/01/21" +msgstr "2026/01/21" + +#: ../../source/community/contributors.md +msgid "" +"[1ed9524](https://github.com/vllm-project/vllm-" +"ascend/commit/1ed9524763590e5ba509c0cd07c80113123e7bf6)" +msgstr "" +"[1ed9524](https://github.com/vllm-project/vllm-" +"ascend/commit/1ed9524763590e5ba509c0cd07c80113123e7bf6)" + +#: ../../source/community/contributors.md +msgid "298" +msgstr "298" + +#: ../../source/community/contributors.md +msgid "[@aipaes](https://github.com/aipaes)" +msgstr "[@aipaes](https://github.com/aipaes)" + +#: ../../source/community/contributors.md +msgid "2026/01/19" +msgstr "2026/01/19" + +#: ../../source/community/contributors.md +msgid "" +"[f58e110](https://github.com/vllm-project/vllm-" +"ascend/commit/f58e110afe65653c50f909f701e8ce7fec20054a)" +msgstr "" +"[f58e110](https://github.com/vllm-project/vllm-" +"ascend/commit/f58e110afe65653c50f909f701e8ce7fec20054a)" + +#: ../../source/community/contributors.md +msgid "297" +msgstr "297" + +#: ../../source/community/contributors.md +msgid "[@Tflowers-0129](https://github.com/Tflowers-0129)" +msgstr "[@Tflowers-0129](https://github.com/Tflowers-0129)" + +#: ../../source/community/contributors.md +msgid "2026/01/17" +msgstr "2026/01/17" + +#: ../../source/community/contributors.md +msgid "" +"[1ffca86](https://github.com/vllm-project/vllm-" +"ascend/commit/1ffca8673fa460174b891a194560e4fd05eb651d)" +msgstr "" +"[1ffca86](https://github.com/vllm-project/vllm-" +"ascend/commit/1ffca8673fa460174b891a194560e4fd05eb651d)" + +#: ../../source/community/contributors.md +msgid "296" +msgstr "296" + +#: ../../source/community/contributors.md +msgid "[@ichaoren](https://github.com/ichaoren)" +msgstr "[@ichaoren](https://github.com/ichaoren)" + +#: ../../source/community/contributors.md +msgid "" +"[d17370b](https://github.com/vllm-project/vllm-" +"ascend/commit/d17370b9233241fd1b771fbbebe59d0d46c23f93)" +msgstr "" +"[d17370b](https://github.com/vllm-project/vllm-" +"ascend/commit/d17370b9233241fd1b771fbbebe59d0d46c23f93)" + +#: ../../source/community/contributors.md +msgid "295" +msgstr "295" + +#: ../../source/community/contributors.md +msgid "[@brandneway](https://github.com/brandneway)" +msgstr "[@brandneway](https://github.com/brandneway)" + +#: ../../source/community/contributors.md +msgid "2026/01/16" +msgstr "2026/01/16" + +#: ../../source/community/contributors.md +msgid "" +"[d426276](https://github.com/vllm-project/vllm-" +"ascend/commit/d426276b4b9f65c952cfeb4e37022829e8b66dd8)" +msgstr "" +"[d426276](https://github.com/vllm-project/vllm-" +"ascend/commit/d426276b4b9f65c952cfeb4e37022829e8b66dd8)" + +#: ../../source/community/contributors.md +msgid "294" +msgstr "294" + +#: ../../source/community/contributors.md +msgid "[@Rozwel-dx](https://github.com/Rozwel-dx)" +msgstr "[@Rozwel-dx](https://github.com/Rozwel-dx)" + +#: ../../source/community/contributors.md +msgid "2026/01/13" +msgstr "2026/01/13" + +#: ../../source/community/contributors.md +msgid "" +"[8d57128](https://github.com/vllm-project/vllm-" +"ascend/commit/8d571286dd223f7ebb58b9aae155d55c57b1a5b6)" +msgstr "" +"[8d57128](https://github.com/vllm-project/vllm-" +"ascend/commit/8d571286dd223f7ebb58b9aae155d55c57b1a5b6)" + +#: ../../source/community/contributors.md +msgid "293" +msgstr "293" + +#: ../../source/community/contributors.md +msgid "[@LiuYi-Up](https://github.com/LiuYi-Up)" +msgstr "[@LiuYi-Up](https://github.com/LiuYi-Up)" + +#: ../../source/community/contributors.md +msgid "" +"[dde547e](https://github.com/vllm-project/vllm-" +"ascend/commit/dde547e900e5263e046be665c7664af13a5c7f75)" +msgstr "" +"[dde547e](https://github.com/vllm-project/vllm-" +"ascend/commit/dde547e900e5263e046be665c7664af13a5c7f75)" + +#: ../../source/community/contributors.md +msgid "292" +msgstr "292" + +#: ../../source/community/contributors.md +msgid "[@nomewang](https://github.com/nomewang)" +msgstr "[@nomewang](https://github.com/nomewang)" + +#: ../../source/community/contributors.md +msgid "2026/01/12" +msgstr "2026/01/12" + +#: ../../source/community/contributors.md +msgid "" +"[348cdf9](https://github.com/vllm-project/vllm-" +"ascend/commit/348cdf98aad7ae9b399bf8481fcf2bb3baa6a636)" +msgstr "" +"[348cdf9](https://github.com/vllm-project/vllm-" +"ascend/commit/348cdf98aad7ae9b399bf8481fcf2bb3baa6a636)" + +#: ../../source/community/contributors.md +msgid "291" +msgstr "291" + +#: ../../source/community/contributors.md +msgid "[@Jeaniowang](https://github.com/Jeaniowang)" +msgstr "[@Jeaniowang](https://github.com/Jeaniowang)" + +#: ../../source/community/contributors.md +msgid "" +"[4453c60](https://github.com/vllm-project/vllm-" +"ascend/commit/4453c602626c6bce50b376bbb6e803d7b0131a6e)" +msgstr "" +"[4453c60](https://github.com/vllm-project/vllm-" +"ascend/commit/4453c602626c6bce50b376bbb6e803d7b0131a6e)" + +#: ../../source/community/contributors.md +msgid "290" +msgstr "290" + +#: ../../source/community/contributors.md +msgid "[@gh924](https://github.com/gh924)" +msgstr "[@gh924](https://github.com/gh924)" + +#: ../../source/community/contributors.md +msgid "2026/01/11" +msgstr "2026/01/11" + +#: ../../source/community/contributors.md +msgid "" +"[6880c1b](https://github.com/vllm-project/vllm-" +"ascend/commit/6880c1b3834ba24b12f5537e50dcac7c7ca1fa92)" +msgstr "" +"[6880c1b](https://github.com/vllm-project/vllm-" +"ascend/commit/6880c1b3834ba24b12f5537e50dcac7c7ca1fa92)" + +#: ../../source/community/contributors.md +msgid "289" +msgstr "289" + +#: ../../source/community/contributors.md +msgid "[@Feng-xiaosuo](https://github.com/Feng-xiaosuo)" +msgstr "[@Feng-xiaosuo](https://github.com/Feng-xiaosuo)" + +#: ../../source/community/contributors.md +msgid "2026/01/10" +msgstr "2026/01/10" + +#: ../../source/community/contributors.md +msgid "" +"[c316679](https://github.com/vllm-project/vllm-" +"ascend/commit/c316679e658205313c0a547b8ae4365382e3a685)" +msgstr "" +"[c316679](https://github.com/vllm-project/vllm-" +"ascend/commit/c316679e658205313c0a547b8ae4365382e3a685)" + +#: ../../source/community/contributors.md +msgid "288" +msgstr "288" + +#: ../../source/community/contributors.md +msgid "[@zyz111222](https://github.com/zyz111222)" +msgstr "[@zyz111222](https://github.com/zyz111222)" + +#: ../../source/community/contributors.md +msgid "2026/01/09" +msgstr "2026/01/09" + +#: ../../source/community/contributors.md +msgid "" +"[98c788a](https://github.com/vllm-project/vllm-" +"ascend/commit/98c788a65ae7bdc982b8f5088bcefc4f4c716945)" +msgstr "" +"[98c788a](https://github.com/vllm-project/vllm-" +"ascend/commit/98c788a65ae7bdc982b8f5088bcefc4f4c716945)" + +#: ../../source/community/contributors.md +msgid "287" +msgstr "287" + +#: ../../source/community/contributors.md +msgid "[@wwwumr](https://github.com/wwwumr)" +msgstr "[@wwwumr](https://github.com/wwwumr)" + +#: ../../source/community/contributors.md +msgid "" +"[3ce5a34](https://github.com/vllm-project/vllm-" +"ascend/commit/3ce5a34468e92512670759f7ee0aae0defa4ae94)" +msgstr "" +"[3ce5a34](https://github.com/vllm-project/vllm-" +"ascend/commit/3ce5a34468e92512670759f7ee0aae0defa4ae94)" + +#: ../../source/community/contributors.md +msgid "286" +msgstr "286" + +#: ../../source/community/contributors.md +msgid "[@lhchg](https://github.com/lhchg)" +msgstr "[@lhchg](https://github.com/lhchg)" + +#: ../../source/community/contributors.md +msgid "" +"[dc99cfd](https://github.com/vllm-project/vllm-" +"ascend/commit/dc99cfdc15bfb094983e49ec14e4eaa98dee7a7a)" +msgstr "" +"[dc99cfd](https://github.com/vllm-project/vllm-" +"ascend/commit/dc99cfdc15bfb094983e49ec14e4eaa98dee7a7a)" + +#: ../../source/community/contributors.md +msgid "285" +msgstr "285" + +#: ../../source/community/contributors.md +msgid "[@icerain-alt](https://github.com/icerain-alt)" +msgstr "[@icerain-alt](https://github.com/icerain-alt)" + +#: ../../source/community/contributors.md +msgid "" +"[09682e0](https://github.com/vllm-project/vllm-" +"ascend/commit/09682e075118aaacb0a717f2b7078bad040599a9)" +msgstr "" +"[09682e0](https://github.com/vllm-project/vllm-" +"ascend/commit/09682e075118aaacb0a717f2b7078bad040599a9)" + +#: ../../source/community/contributors.md +msgid "284" +msgstr "284" + +#: ../../source/community/contributors.md +msgid "[@wangyibo1005](https://github.com/wangyibo1005)" +msgstr "[@wangyibo1005](https://github.com/wangyibo1005)" + +#: ../../source/community/contributors.md +msgid "2026/01/07" +msgstr "2026/01/07" + +#: ../../source/community/contributors.md +msgid "" +"[25baf6d](https://github.com/vllm-project/vllm-" +"ascend/commit/25baf6df095f10c6af29fcafd56ad3ff3e83a9b2)" +msgstr "" +"[25baf6d](https://github.com/vllm-project/vllm-" +"ascend/commit/25baf6df095f10c6af29fcafd56ad3ff3e83a9b2)" + +#: ../../source/community/contributors.md +msgid "283" +msgstr "283" + +#: ../../source/community/contributors.md +msgid "[@starmountain1997](https://github.com/starmountain1997)" +msgstr "[@starmountain1997](https://github.com/starmountain1997)" + +#: ../../source/community/contributors.md +msgid "" +"[086c093](https://github.com/vllm-project/vllm-" +"ascend/commit/086c093347118d0448df6ac7e80a7b4dd1cdc4f8)" +msgstr "" +"[086c093](https://github.com/vllm-project/vllm-" +"ascend/commit/086c093347118d0448df6ac7e80a7b4dd1cdc4f8)" + +#: ../../source/community/contributors.md +msgid "282" +msgstr "282" + +#: ../../source/community/contributors.md +msgid "[@pacoxu](https://github.com/pacoxu)" +msgstr "[@pacoxu](https://github.com/pacoxu)" + +#: ../../source/community/contributors.md +msgid "" +"[4f98080](https://github.com/vllm-project/vllm-" +"ascend/commit/4f9808002b1bc0f1b75762d430983940ca7f4875)" +msgstr "" +"[4f98080](https://github.com/vllm-project/vllm-" +"ascend/commit/4f9808002b1bc0f1b75762d430983940ca7f4875)" + +#: ../../source/community/contributors.md +msgid "281" +msgstr "281" + +#: ../../source/community/contributors.md +msgid "[@zhangguinan](https://github.com/zhangguinan)" +msgstr "[@zhangguinan](https://github.com/zhangguinan)" + +#: ../../source/community/contributors.md +msgid "2026/01/05" +msgstr "2026/01/05" + +#: ../../source/community/contributors.md +msgid "" +"[6c1a685](https://github.com/vllm-project/vllm-" +"ascend/commit/6c1a685b30b9c75a6fee29c445e2a04229896694)" +msgstr "" +"[6c1a685](https://github.com/vllm-project/vllm-" +"ascend/commit/6c1a685b30b9c75a6fee29c445e2a04229896694)" + +#: ../../source/community/contributors.md +msgid "280" +msgstr "280" + +#: ../../source/community/contributors.md +msgid "[@L4-1024](https://github.com/L4-1024)" +msgstr "[@L4-1024](https://github.com/L4-1024)" + +#: ../../source/community/contributors.md +msgid "" +"[c23cf30](https://github.com/vllm-project/vllm-" +"ascend/commit/c23cf30709667f6fdde8d66957567933a590546a)" +msgstr "" +"[c23cf30](https://github.com/vllm-project/vllm-" +"ascend/commit/c23cf30709667f6fdde8d66957567933a590546a)" + +#: ../../source/community/contributors.md +msgid "279" +msgstr "279" + +#: ../../source/community/contributors.md +msgid "[@frankie-ys](https://github.com/frankie-ys)" +msgstr "[@frankie-ys](https://github.com/frankie-ys)" + +#: ../../source/community/contributors.md +msgid "" +"[ec35633](https://github.com/vllm-project/vllm-" +"ascend/commit/ec3563334b7f95925131ab758378341d23895863)" +msgstr "" +"[ec35633](https://github.com/vllm-project/vllm-" +"ascend/commit/ec3563334b7f95925131ab758378341d23895863)" + +#: ../../source/community/contributors.md +msgid "278" +msgstr "278" + +#: ../../source/community/contributors.md +msgid "[@Debonex](https://github.com/Debonex)" +msgstr "[@Debonex](https://github.com/Debonex)" + +#: ../../source/community/contributors.md +msgid "" +"[d86021f](https://github.com/vllm-project/vllm-" +"ascend/commit/d86021f7b4f86be6f853b03da82628b4d0d6ea4b)" +msgstr "" +"[d86021f](https://github.com/vllm-project/vllm-" +"ascend/commit/d86021f7b4f86be6f853b03da82628b4d0d6ea4b)" + +#: ../../source/community/contributors.md +msgid "277" +msgstr "277" + +#: ../../source/community/contributors.md +msgid "[@chuyuelin](https://github.com/chuyuelin)" +msgstr "[@chuyuelin](https://github.com/chuyuelin)" + +#: ../../source/community/contributors.md +msgid "2025/12/31" +msgstr "2025/12/31" + +#: ../../source/community/contributors.md +msgid "" +"[d07d8a4](https://github.com/vllm-project/vllm-" +"ascend/commit/d07d8a4535f499c7af95e1966b86700a693c8253)" +msgstr "" +"[d07d8a4](https://github.com/vllm-project/vllm-" +"ascend/commit/d07d8a4535f499c7af95e1966b86700a693c8253)" + +#: ../../source/community/contributors.md +msgid "276" +msgstr "276" + +#: ../../source/community/contributors.md +msgid "[@ZCG12345](https://github.com/ZCG12345)" +msgstr "[@ZCG12345](https://github.com/ZCG12345)" + +#: ../../source/community/contributors.md +msgid "2025/12/30" +msgstr "2025/12/30" + +#: ../../source/community/contributors.md +msgid "" +"[45c3c27](https://github.com/vllm-project/vllm-" +"ascend/commit/45c3c279e2b31c85c8739c45b43d8c47710e447b)" +msgstr "" +"[45c3c27](https://github.com/vllm-project/vllm-" +"ascend/commit/45c3c279e2b31c85c8739c45b43d8c47710e447b)" + +#: ../../source/community/contributors.md +msgid "275" +msgstr "275" + +#: ../../source/community/contributors.md +msgid "[@hu-qi](https://github.com/hu-qi)" +msgstr "[@hu-qi](https://github.com/hu-qi)" + +#: ../../source/community/contributors.md +msgid "" +"[c85cc04](https://github.com/vllm-project/vllm-" +"ascend/commit/c85cc045f893293e3b44e24d2e1f01ddc5849ea8)" +msgstr "" +"[c85cc04](https://github.com/vllm-project/vllm-" +"ascend/commit/c85cc045f893293e3b44e24d2e1f01ddc5849ea8)" + +#: ../../source/community/contributors.md +msgid "274" +msgstr "274" + +#: ../../source/community/contributors.md +msgid "[@zhanzy178](https://github.com/zhanzy178)" +msgstr "[@zhanzy178](https://github.com/zhanzy178)" + +#: ../../source/community/contributors.md +msgid "2025/12/29" +msgstr "2025/12/29" + +#: ../../source/community/contributors.md +msgid "" +"[d8e15da](https://github.com/vllm-project/vllm-" +"ascend/commit/d8e15dae6c5e563c3284309d4557afb4d4a17feb)" +msgstr "" +"[d8e15da](https://github.com/vllm-project/vllm-" +"ascend/commit/d8e15dae6c5e563c3284309d4557afb4d4a17feb)" + +#: ../../source/community/contributors.md +msgid "273" +msgstr "273" + +#: ../../source/community/contributors.md +msgid "[@jiazhengyi](https://github.com/jiazhengyi)" +msgstr "[@jiazhengyi](https://github.com/jiazhengyi)" + +#: ../../source/community/contributors.md +msgid "" +"[d5f7283](https://github.com/vllm-project/vllm-" +"ascend/commit/d5f72835e65e6b6b8cf8e173a9048c926890881b)" +msgstr "" +"[d5f7283](https://github.com/vllm-project/vllm-" +"ascend/commit/d5f72835e65e6b6b8cf8e173a9048c926890881b)" + +#: ../../source/community/contributors.md +msgid "272" +msgstr "272" + +#: ../../source/community/contributors.md +msgid "[@Fager10086](https://github.com/Fager10086)" +msgstr "[@Fager10086](https://github.com/Fager10086)" + +#: ../../source/community/contributors.md +msgid "" +"[51da5ea](https://github.com/vllm-project/vllm-" +"ascend/commit/51da5ea5438749fa057a7bb60977750e3ac22392)" +msgstr "" +"[51da5ea](https://github.com/vllm-project/vllm-" +"ascend/commit/51da5ea5438749fa057a7bb60977750e3ac22392)" + +#: ../../source/community/contributors.md +msgid "271" +msgstr "271" + +#: ../../source/community/contributors.md +msgid "[@jiangkuaixue123](https://github.com/jiangkuaixue123)" +msgstr "[@jiangkuaixue123](https://github.com/jiangkuaixue123)" + +#: ../../source/community/contributors.md +msgid "2025/12/27" +msgstr "2025/12/27" + +#: ../../source/community/contributors.md +msgid "" +"[e91e11d](https://github.com/vllm-project/vllm-" +"ascend/commit/e91e11d3b0a961f2e0e034cd738632653e5f6bdc)" +msgstr "" +"[e91e11d](https://github.com/vllm-project/vllm-" +"ascend/commit/e91e11d3b0a961f2e0e034cd738632653e5f6bdc)" + +#: ../../source/community/contributors.md +msgid "270" +msgstr "270" + +#: ../../source/community/contributors.md +msgid "[@maoxx241](https://github.com/maoxx241)" +msgstr "[@maoxx241](https://github.com/maoxx241)" + +#: ../../source/community/contributors.md +msgid "2025/12/26" +msgstr "2025/12/26" + +#: ../../source/community/contributors.md +msgid "" +"[7372225](https://github.com/vllm-project/vllm-" +"ascend/commit/7372225bcb0bd4896f43e989cef8109bfe45b13c)" +msgstr "" +"[7372225](https://github.com/vllm-project/vllm-" +"ascend/commit/7372225bcb0bd4896f43e989cef8109bfe45b13c)" + +#: ../../source/community/contributors.md +msgid "269" +msgstr "269" + +#: ../../source/community/contributors.md +msgid "[@wjunLu](https://github.com/wjunLu)" +msgstr "[@wjunLu](https://github.com/wjunLu)" + +#: ../../source/community/contributors.md +msgid "2025/12/25" +msgstr "2025/12/25" + +#: ../../source/community/contributors.md +msgid "" +"[fca2f94](https://github.com/vllm-project/vllm-" +"ascend/commit/fca2f948c101c9d520f1a381a6b705d9e11c042e)" +msgstr "" +"[fca2f94](https://github.com/vllm-project/vllm-" +"ascend/commit/fca2f948c101c9d520f1a381a6b705d9e11c042e)" + +#: ../../source/community/contributors.md +msgid "268" +msgstr "268" + +#: ../../source/community/contributors.md +msgid "[@cookieyyds](https://github.com/cookieyyds)" +msgstr "[@cookieyyds](https://github.com/cookieyyds)" + +#: ../../source/community/contributors.md +msgid "" +"[2da8038](https://github.com/vllm-project/vllm-" +"ascend/commit/2da8038dd2d30f813c00b37ba2cd9abeaaf3d0f3)" +msgstr "" +"[2da8038](https://github.com/vllm-project/vllm-" +"ascend/commit/2da8038dd2d30f813c00b37ba2cd9abeaaf3d0f3)" + +#: ../../source/community/contributors.md +msgid "267" +msgstr "267" + +#: ../../source/community/contributors.md +msgid "[@chenaoxuan](https://github.com/chenaoxuan)" +msgstr "[@chenaoxuan](https://github.com/chenaoxuan)" + +#: ../../source/community/contributors.md +msgid "" +"[6d25372](https://github.com/vllm-project/vllm-" +"ascend/commit/6d25372baaa0ef018a75b427b387fab8dd2e92b4)" +msgstr "" +"[6d25372](https://github.com/vllm-project/vllm-" +"ascend/commit/6d25372baaa0ef018a75b427b387fab8dd2e92b4)" + +#: ../../source/community/contributors.md +msgid "266" +msgstr "266" + +#: ../../source/community/contributors.md +msgid "[@changdawei1](https://github.com/changdawei1)" +msgstr "[@changdawei1](https://github.com/changdawei1)" + +#: ../../source/community/contributors.md +msgid "" +"[a9fccbe](https://github.com/vllm-project/vllm-" +"ascend/commit/a9fccbeb300fd4753f8e54aaddf02abf59d7429c)" +msgstr "" +"[a9fccbe](https://github.com/vllm-project/vllm-" +"ascend/commit/a9fccbeb300fd4753f8e54aaddf02abf59d7429c)" + +#: ../../source/community/contributors.md +msgid "265" +msgstr "265" + +#: ../../source/community/contributors.md +msgid "[@TmacAaron](https://github.com/TmacAaron)" +msgstr "[@TmacAaron](https://github.com/TmacAaron)" + +#: ../../source/community/contributors.md +msgid "2025/12/24" +msgstr "2025/12/24" + +#: ../../source/community/contributors.md +msgid "" +"[5018f2d](https://github.com/vllm-project/vllm-" +"ascend/commit/5018f2d8fd6dc11240900e38049150619895570d)" +msgstr "" +"[5018f2d](https://github.com/vllm-project/vllm-" +"ascend/commit/5018f2d8fd6dc11240900e38049150619895570d)" + +#: ../../source/community/contributors.md +msgid "264" +msgstr "264" + +#: ../../source/community/contributors.md +msgid "[@lengrongfu](https://github.com/lengrongfu)" +msgstr "[@lengrongfu](https://github.com/lengrongfu)" + +#: ../../source/community/contributors.md +msgid "2025/12/23" +msgstr "2025/12/23" + +#: ../../source/community/contributors.md +msgid "" +"[c9b5881](https://github.com/vllm-project/vllm-" +"ascend/commit/c9b5881bcd3a7b76d7b631471e4f3eae2ac545dd)" +msgstr "" +"[c9b5881](https://github.com/vllm-project/vllm-" +"ascend/commit/c9b5881bcd3a7b76d7b631471e4f3eae2ac545dd)" + +#: ../../source/community/contributors.md +msgid "263" +msgstr "263" + +#: ../../source/community/contributors.md +msgid "[@hzxuzhonghu](https://github.com/hzxuzhonghu)" +msgstr "[@hzxuzhonghu](https://github.com/hzxuzhonghu)" + +#: ../../source/community/contributors.md +msgid "" +"[cb963c5](https://github.com/vllm-project/vllm-" +"ascend/commit/cb963c53a58a4a7339f3507fcc087d83a0d7df7e)" +msgstr "" +"[cb963c5](https://github.com/vllm-project/vllm-" +"ascend/commit/cb963c53a58a4a7339f3507fcc087d83a0d7df7e)" + +#: ../../source/community/contributors.md +msgid "262" +msgstr "262" + +#: ../../source/community/contributors.md +msgid "[@pisceskkk](https://github.com/pisceskkk)" +msgstr "[@pisceskkk](https://github.com/pisceskkk)" + +#: ../../source/community/contributors.md +msgid "2025/12/22" +msgstr "2025/12/22" + +#: ../../source/community/contributors.md +msgid "" +"[ea6206b](https://github.com/vllm-project/vllm-" +"ascend/commit/ea6206bb183e555345e0fbc38c7cd0a1634fe21f)" +msgstr "" +"[ea6206b](https://github.com/vllm-project/vllm-" +"ascend/commit/ea6206bb183e555345e0fbc38c7cd0a1634fe21f)" + +#: ../../source/community/contributors.md +msgid "261" +msgstr "261" + +#: ../../source/community/contributors.md +msgid "[@OsirisDuan](https://github.com/OsirisDuan)" +msgstr "[@OsirisDuan](https://github.com/OsirisDuan)" + +#: ../../source/community/contributors.md +msgid "" +"[b2c1216](https://github.com/vllm-project/vllm-" +"ascend/commit/b2c121637fd8b8045e66e24ea0f63cb17ffb3b69)" +msgstr "" +"[b2c1216](https://github.com/vllm-project/vllm-" +"ascend/commit/b2c121637fd8b8045e66e24ea0f63cb17ffb3b69)" + +#: ../../source/community/contributors.md +msgid "260" +msgstr "260" + +#: ../../source/community/contributors.md +msgid "[@LJQ142857](https://github.com/LJQ142857)" +msgstr "[@LJQ142857](https://github.com/LJQ142857)" + +#: ../../source/community/contributors.md +msgid "" +"[55beac9](https://github.com/vllm-project/vllm-" +"ascend/commit/55beac9c91b8399cbe6425bd79f225b5dc72b243)" +msgstr "" +"[55beac9](https://github.com/vllm-project/vllm-" +"ascend/commit/55beac9c91b8399cbe6425bd79f225b5dc72b243)" + +#: ../../source/community/contributors.md +msgid "259" +msgstr "259" + +#: ../../source/community/contributors.md +msgid "[@YuhanBai](https://github.com/YuhanBai)" +msgstr "[@YuhanBai](https://github.com/YuhanBai)" + +#: ../../source/community/contributors.md +msgid "2025/12/20" +msgstr "2025/12/20" + +#: ../../source/community/contributors.md +msgid "" +"[5d02eed](https://github.com/vllm-project/vllm-" +"ascend/commit/5d02eed16f6e4e5187259583f9db16a916095ab3)" +msgstr "" +"[5d02eed](https://github.com/vllm-project/vllm-" +"ascend/commit/5d02eed16f6e4e5187259583f9db16a916095ab3)" + +#: ../../source/community/contributors.md +msgid "258" +msgstr "258" + +#: ../../source/community/contributors.md +msgid "[@luluxiu520](https://github.com/luluxiu520)" +msgstr "[@luluxiu520](https://github.com/luluxiu520)" + +#: ../../source/community/contributors.md +msgid "2025/12/19" +msgstr "2025/12/19" + +#: ../../source/community/contributors.md +msgid "" +"[bc05a81](https://github.com/vllm-project/vllm-" +"ascend/commit/bc05a81bf2d68dc45d9c6115e5d4f3191a528891)" +msgstr "" +"[bc05a81](https://github.com/vllm-project/vllm-" +"ascend/commit/bc05a81bf2d68dc45d9c6115e5d4f3191a528891)" + +#: ../../source/community/contributors.md +msgid "257" +msgstr "257" + +#: ../../source/community/contributors.md +msgid "[@hukongyi](https://github.com/hukongyi)" +msgstr "[@hukongyi](https://github.com/hukongyi)" + +#: ../../source/community/contributors.md +msgid "" +"[ea8f544](https://github.com/vllm-project/vllm-" +"ascend/commit/ea8f544ce73708aef8c9d48a2916b64fa0e09806)" +msgstr "" +"[ea8f544](https://github.com/vllm-project/vllm-" +"ascend/commit/ea8f544ce73708aef8c9d48a2916b64fa0e09806)" + +#: ../../source/community/contributors.md +msgid "256" +msgstr "256" + +#: ../../source/community/contributors.md +msgid "[@ZT-AIA](https://github.com/ZT-AIA)" +msgstr "[@ZT-AIA](https://github.com/ZT-AIA)" + +#: ../../source/community/contributors.md +msgid "2025/12/18" +msgstr "2025/12/18" + +#: ../../source/community/contributors.md +msgid "" +"[39fb9e7](https://github.com/vllm-project/vllm-" +"ascend/commit/39fb9e7c8324c48f967a5f38d77b3e1225ecec2f)" +msgstr "" +"[39fb9e7](https://github.com/vllm-project/vllm-" +"ascend/commit/39fb9e7c8324c48f967a5f38d77b3e1225ecec2f)" + +#: ../../source/community/contributors.md +msgid "255" +msgstr "255" + +#: ../../source/community/contributors.md +msgid "[@yuxinshan](https://github.com/yuxinshan)" +msgstr "[@yuxinshan](https://github.com/yuxinshan)" + +#: ../../source/community/contributors.md +msgid "" +"[b0376ab](https://github.com/vllm-project/vllm-" +"ascend/commit/b0376abd4c8d535953208c1b38c646ac7e8b31f4)" +msgstr "" +"[b0376ab](https://github.com/vllm-project/vllm-" +"ascend/commit/b0376abd4c8d535953208c1b38c646ac7e8b31f4)" + +#: ../../source/community/contributors.md +msgid "254" +msgstr "254" + +#: ../../source/community/contributors.md +msgid "[@yuxingcyx](https://github.com/yuxingcyx)" +msgstr "[@yuxingcyx](https://github.com/yuxingcyx)" + +#: ../../source/community/contributors.md +msgid "" +"[5a88e33](https://github.com/vllm-project/vllm-" +"ascend/commit/5a88e3333bea5d4c948e14447d2d9e4ca8963e4d)" +msgstr "" +"[5a88e33](https://github.com/vllm-project/vllm-" +"ascend/commit/5a88e3333bea5d4c948e14447d2d9e4ca8963e4d)" + +#: ../../source/community/contributors.md +msgid "253" +msgstr "253" + +#: ../../source/community/contributors.md +msgid "[@TingW09](https://github.com/TingW09)" +msgstr "[@TingW09](https://github.com/TingW09)" + +#: ../../source/community/contributors.md +msgid "" +"[879ec2d](https://github.com/vllm-project/vllm-" +"ascend/commit/879ec2d1c4267f6a927ac6d3d2d835fe37e89f80)" +msgstr "" +"[879ec2d](https://github.com/vllm-project/vllm-" +"ascend/commit/879ec2d1c4267f6a927ac6d3d2d835fe37e89f80)" + +#: ../../source/community/contributors.md +msgid "252" +msgstr "252" + +#: ../../source/community/contributors.md +msgid "[@LICO1314](https://github.com/LICO1314)" +msgstr "[@LICO1314](https://github.com/LICO1314)" + +#: ../../source/community/contributors.md +msgid "" +"[9fcaf66](https://github.com/vllm-project/vllm-" +"ascend/commit/9fcaf66646d15671ab4d9bcf3530cccfcc6b2675)" +msgstr "" +"[9fcaf66](https://github.com/vllm-project/vllm-" +"ascend/commit/9fcaf66646d15671ab4d9bcf3530cccfcc6b2675)" + +#: ../../source/community/contributors.md +msgid "251" +msgstr "251" + +#: ../../source/community/contributors.md +msgid "[@YzTongNiar](https://github.com/YzTongNiar)" +msgstr "[@YzTongNiar](https://github.com/YzTongNiar)" + +#: ../../source/community/contributors.md +msgid "2025/12/17" +msgstr "2025/12/17" + +#: ../../source/community/contributors.md +msgid "" +"[7671ce1](https://github.com/vllm-project/vllm-" +"ascend/commit/7671ce1bf12b3df45aaa92c6316b40d8f954230a)" +msgstr "" +"[7671ce1](https://github.com/vllm-project/vllm-" +"ascend/commit/7671ce1bf12b3df45aaa92c6316b40d8f954230a)" + +#: ../../source/community/contributors.md +msgid "250" +msgstr "250" + +#: ../../source/community/contributors.md +msgid "[@Toneymiller](https://github.com/Toneymiller)" +msgstr "[@Toneymiller](https://github.com/Toneymiller)" + +#: ../../source/community/contributors.md +msgid "" +"[b1a853b](https://github.com/vllm-project/vllm-" +"ascend/commit/b1a853b0f64961ac9145fd4a510e194fb152992e)" +msgstr "" +"[b1a853b](https://github.com/vllm-project/vllm-" +"ascend/commit/b1a853b0f64961ac9145fd4a510e194fb152992e)" + +#: ../../source/community/contributors.md +msgid "249" +msgstr "249" + +#: ../../source/community/contributors.md +msgid "[@JeffLee1874](https://github.com/JeffLee1874)" +msgstr "[@JeffLee1874](https://github.com/JeffLee1874)" + +#: ../../source/community/contributors.md +msgid "" +"[724d043](https://github.com/vllm-project/vllm-" +"ascend/commit/724d04391e89408f3fc3fb0cef13e5f4cf155dde)" +msgstr "" +"[724d043](https://github.com/vllm-project/vllm-" +"ascend/commit/724d04391e89408f3fc3fb0cef13e5f4cf155dde)" + +#: ../../source/community/contributors.md +msgid "248" +msgstr "248" + +#: ../../source/community/contributors.md +msgid "[@ader47](https://github.com/ader47)" +msgstr "[@ader47](https://github.com/ader47)" + +#: ../../source/community/contributors.md +msgid "" +"[eda3cab](https://github.com/vllm-project/vllm-" +"ascend/commit/eda3cabf5b0de9e0e435328e666614f5320e657c)" +msgstr "" +"[eda3cab](https://github.com/vllm-project/vllm-" +"ascend/commit/eda3cabf5b0de9e0e435328e666614f5320e657c)" + +#: ../../source/community/contributors.md +msgid "247" +msgstr "247" + +#: ../../source/community/contributors.md +msgid "[@UnifiedCacheManager](https://github.com/UnifiedCacheManager)" +msgstr "[@UnifiedCacheManager](https://github.com/UnifiedCacheManager)" + +#: ../../source/community/contributors.md +msgid "2025/12/16" +msgstr "2025/12/16" + +#: ../../source/community/contributors.md +msgid "" +"[195eac6](https://github.com/vllm-project/vllm-" +"ascend/commit/195eac665b2d42b8287c59128490586c6931d54c)" +msgstr "" +"[195eac6](https://github.com/vllm-project/vllm-" +"ascend/commit/195eac665b2d42b8287c59128490586c6931d54c)" + +#: ../../source/community/contributors.md +msgid "246" +msgstr "246" + +#: ../../source/community/contributors.md +msgid "[@ming1212](https://github.com/ming1212)" +msgstr "[@ming1212](https://github.com/ming1212)" + +#: ../../source/community/contributors.md +msgid "2025/12/15" +msgstr "2025/12/15" + +#: ../../source/community/contributors.md +msgid "" +"[98b9e2e](https://github.com/vllm-project/vllm-" +"ascend/commit/98b9e2e18e8f8e9f1495eafc8f57e3e1791e0a34)" +msgstr "" +"[98b9e2e](https://github.com/vllm-project/vllm-" +"ascend/commit/98b9e2e18e8f8e9f1495eafc8f57e3e1791e0a34)" + +#: ../../source/community/contributors.md +msgid "245" +msgstr "245" + +#: ../../source/community/contributors.md +msgid "[@knight0528](https://github.com/knight0528)" +msgstr "[@knight0528](https://github.com/knight0528)" + +#: ../../source/community/contributors.md +msgid "" +"[e25c57b](https://github.com/vllm-project/vllm-" +"ascend/commit/e25c57b3461b8116d533724013fbc9957716f92d)" +msgstr "" +"[e25c57b](https://github.com/vllm-project/vllm-" +"ascend/commit/e25c57b3461b8116d533724013fbc9957716f92d)" + +#: ../../source/community/contributors.md +msgid "244" +msgstr "244" + +#: ../../source/community/contributors.md +msgid "[@wangyao-i](https://github.com/wangyao-i)" +msgstr "[@wangyao-i](https://github.com/wangyao-i)" + +#: ../../source/community/contributors.md +msgid "2025/12/12" +msgstr "2025/12/12" + +#: ../../source/community/contributors.md +msgid "" +"[0983c55](https://github.com/vllm-project/vllm-" +"ascend/commit/0983c5510aa49c7310b79db72657d8a0f92918ec)" +msgstr "" +"[0983c55](https://github.com/vllm-project/vllm-" +"ascend/commit/0983c5510aa49c7310b79db72657d8a0f92918ec)" + +#: ../../source/community/contributors.md +msgid "243" +msgstr "243" + +#: ../../source/community/contributors.md +msgid "[@sunchendd](https://github.com/sunchendd)" +msgstr "[@sunchendd](https://github.com/sunchendd)" + +#: ../../source/community/contributors.md +msgid "" +"[5932abc](https://github.com/vllm-project/vllm-" +"ascend/commit/5932abc44686cc0d3fa1c4ce9fdbc30957b8aa51)" +msgstr "" +"[5932abc](https://github.com/vllm-project/vllm-" +"ascend/commit/5932abc44686cc0d3fa1c4ce9fdbc30957b8aa51)" + +#: ../../source/community/contributors.md +msgid "242" +msgstr "242" + +#: ../../source/community/contributors.md +msgid "[@yangshihao6](https://github.com/yangshihao6)" +msgstr "[@yangshihao6](https://github.com/yangshihao6)" + +#: ../../source/community/contributors.md +msgid "2025/12/11" +msgstr "2025/12/11" + +#: ../../source/community/contributors.md +msgid "" +"[e1bb6f4](https://github.com/vllm-project/vllm-" +"ascend/commit/e1bb6f47ecabcb382ff3b4dfbd168d0943ce504f)" +msgstr "" +"[e1bb6f4](https://github.com/vllm-project/vllm-" +"ascend/commit/e1bb6f47ecabcb382ff3b4dfbd168d0943ce504f)" + +#: ../../source/community/contributors.md +msgid "241" +msgstr "241" + +#: ../../source/community/contributors.md +msgid "[@xlan-huawei](https://github.com/xlan-huawei)" +msgstr "[@xlan-huawei](https://github.com/xlan-huawei)" + +#: ../../source/community/contributors.md +msgid "" +"[5ebb9bd](https://github.com/vllm-project/vllm-" +"ascend/commit/5ebb9bd8d26395e86f9bd01183a327477a966760)" +msgstr "" +"[5ebb9bd](https://github.com/vllm-project/vllm-" +"ascend/commit/5ebb9bd8d26395e86f9bd01183a327477a966760)" + +#: ../../source/community/contributors.md +msgid "240" +msgstr "240" + +#: ../../source/community/contributors.md +msgid "[@hwhaokun](https://github.com/hwhaokun)" +msgstr "[@hwhaokun](https://github.com/hwhaokun)" + +#: ../../source/community/contributors.md +msgid "" +"[a47aa4d](https://github.com/vllm-project/vllm-" +"ascend/commit/a47aa4da2f87dfd2792ef13d31476eccd27685bf)" +msgstr "" +"[a47aa4d](https://github.com/vllm-project/vllm-" +"ascend/commit/a47aa4da2f87dfd2792ef13d31476eccd27685bf)" + +#: ../../source/community/contributors.md +msgid "239" +msgstr "239" + +#: ../../source/community/contributors.md +msgid "[@Gongdayao](https://github.com/Gongdayao)" +msgstr "[@Gongdayao](https://github.com/Gongdayao)" + +#: ../../source/community/contributors.md +msgid "" +"[89a8607](https://github.com/vllm-project/vllm-" +"ascend/commit/89a8607b30ca4f123ae405df42701077b776e4af)" +msgstr "" +"[89a8607](https://github.com/vllm-project/vllm-" +"ascend/commit/89a8607b30ca4f123ae405df42701077b776e4af)" + +#: ../../source/community/contributors.md +msgid "238" +msgstr "238" + +#: ../../source/community/contributors.md +msgid "[@cjy0x](https://github.com/cjy0x)" +msgstr "[@cjy0x](https://github.com/cjy0x)" + +#: ../../source/community/contributors.md +msgid "" +"[c12eb22](https://github.com/vllm-project/vllm-" +"ascend/commit/c12eb22cbe9f55ce72fca3d8231c06d0173e22fb)" +msgstr "" +"[c12eb22](https://github.com/vllm-project/vllm-" +"ascend/commit/c12eb22cbe9f55ce72fca3d8231c06d0173e22fb)" + +#: ../../source/community/contributors.md +msgid "237" +msgstr "237" + +#: ../../source/community/contributors.md +msgid "[@wind-all](https://github.com/wind-all)" +msgstr "[@wind-all](https://github.com/wind-all)" + +#: ../../source/community/contributors.md +msgid "2025/12/10" +msgstr "2025/12/10" + +#: ../../source/community/contributors.md +msgid "" +"[1a443f2](https://github.com/vllm-project/vllm-" +"ascend/commit/1a443f2772ba9a75288e5dc09227ee6bdd54c147)" +msgstr "" +"[1a443f2](https://github.com/vllm-project/vllm-" +"ascend/commit/1a443f2772ba9a75288e5dc09227ee6bdd54c147)" + +#: ../../source/community/contributors.md +msgid "236" +msgstr "236" + +#: ../../source/community/contributors.md +msgid "[@Trunrain](https://github.com/Trunrain)" +msgstr "[@Trunrain](https://github.com/Trunrain)" + +#: ../../source/community/contributors.md +msgid "" +"[ba9cda9](https://github.com/vllm-project/vllm-" +"ascend/commit/ba9cda9dfd83765a7e79a074c033818ff5a76bd6)" +msgstr "" +"[ba9cda9](https://github.com/vllm-project/vllm-" +"ascend/commit/ba9cda9dfd83765a7e79a074c033818ff5a76bd6)" + +#: ../../source/community/contributors.md +msgid "235" +msgstr "235" + +#: ../../source/community/contributors.md +msgid "[@ghphotoframe](https://github.com/ghphotoframe)" +msgstr "[@ghphotoframe](https://github.com/ghphotoframe)" + +#: ../../source/community/contributors.md +msgid "" +"[0d8c0f1](https://github.com/vllm-project/vllm-" +"ascend/commit/0d8c0f1a24a58011908b17994fffb5fcfebe5e22)" +msgstr "" +"[0d8c0f1](https://github.com/vllm-project/vllm-" +"ascend/commit/0d8c0f1a24a58011908b17994fffb5fcfebe5e22)" + +#: ../../source/community/contributors.md +msgid "234" +msgstr "234" + +#: ../../source/community/contributors.md +msgid "[@ChenCangtao](https://github.com/ChenCangtao)" +msgstr "[@ChenCangtao](https://github.com/ChenCangtao)" + +#: ../../source/community/contributors.md +msgid "" +"[dd622aa](https://github.com/vllm-project/vllm-" +"ascend/commit/dd622aa6a6ea34a6bc799d52171ebe6c3e062972)" +msgstr "" +"[dd622aa](https://github.com/vllm-project/vllm-" +"ascend/commit/dd622aa6a6ea34a6bc799d52171ebe6c3e062972)" + +#: ../../source/community/contributors.md +msgid "233" +msgstr "233" + +#: ../../source/community/contributors.md +msgid "[@wangqiankun13](https://github.com/wangqiankun13)" +msgstr "[@wangqiankun13](https://github.com/wangqiankun13)" + +#: ../../source/community/contributors.md +msgid "2025/12/09" +msgstr "2025/12/09" + +#: ../../source/community/contributors.md +msgid "" +"[9567e5d](https://github.com/vllm-project/vllm-" +"ascend/commit/9567e5dd8c61d2e62b3572c9e98dc4f9eaf51c2a)" +msgstr "" +"[9567e5d](https://github.com/vllm-project/vllm-" +"ascend/commit/9567e5dd8c61d2e62b3572c9e98dc4f9eaf51c2a)" + +#: ../../source/community/contributors.md +msgid "232" +msgstr "232" + +#: ../../source/community/contributors.md +msgid "[@zengzengran](https://github.com/zengzengran)" +msgstr "[@zengzengran](https://github.com/zengzengran)" + +#: ../../source/community/contributors.md +msgid "2025/12/08" +msgstr "2025/12/08" + +#: ../../source/community/contributors.md +msgid "" +"[f0876b5](https://github.com/vllm-project/vllm-" +"ascend/commit/f0876b5d88a04734505bb5cea7bda95b026923ff)" +msgstr "" +"[f0876b5](https://github.com/vllm-project/vllm-" +"ascend/commit/f0876b5d88a04734505bb5cea7bda95b026923ff)" + +#: ../../source/community/contributors.md +msgid "231" +msgstr "231" + +#: ../../source/community/contributors.md +msgid "[@shiro-zzzz](https://github.com/shiro-zzzz)" +msgstr "[@shiro-zzzz](https://github.com/shiro-zzzz)" + +#: ../../source/community/contributors.md +msgid "" +"[0617d7d](https://github.com/vllm-project/vllm-" +"ascend/commit/0617d7d394a9b6aa47a75f368d6059b1e5788f06)" +msgstr "" +"[0617d7d](https://github.com/vllm-project/vllm-" +"ascend/commit/0617d7d394a9b6aa47a75f368d6059b1e5788f06)" + +#: ../../source/community/contributors.md +msgid "230" +msgstr "230" + +#: ../../source/community/contributors.md +msgid "[@lulina](https://github.com/lulina)" +msgstr "[@lulina](https://github.com/lulina)" + +#: ../../source/community/contributors.md +msgid "" +"[2be0fe2](https://github.com/vllm-project/vllm-" +"ascend/commit/2be0fe269160e97dc9449101e92a98e7f3436668)" +msgstr "" +"[2be0fe2](https://github.com/vllm-project/vllm-" +"ascend/commit/2be0fe269160e97dc9449101e92a98e7f3436668)" + +#: ../../source/community/contributors.md +msgid "229" +msgstr "229" + +#: ../../source/community/contributors.md +msgid "[@lhp-deep](https://github.com/lhp-deep)" +msgstr "[@lhp-deep](https://github.com/lhp-deep)" + +#: ../../source/community/contributors.md +msgid "" +"[b230e7e](https://github.com/vllm-project/vllm-" +"ascend/commit/b230e7e987da602a3985a4f297b01decfc1ec40f)" +msgstr "" +"[b230e7e](https://github.com/vllm-project/vllm-" +"ascend/commit/b230e7e987da602a3985a4f297b01decfc1ec40f)" + +#: ../../source/community/contributors.md +msgid "228" +msgstr "228" + +#: ../../source/community/contributors.md +msgid "[@h1074112368](https://github.com/h1074112368)" +msgstr "[@h1074112368](https://github.com/h1074112368)" + +#: ../../source/community/contributors.md +msgid "2025/12/06" +msgstr "2025/12/06" + +#: ../../source/community/contributors.md +msgid "" +"[7403399](https://github.com/vllm-project/vllm-" +"ascend/commit/74033999ed98fcf02db0a2512d019177d0843791)" +msgstr "" +"[7403399](https://github.com/vllm-project/vllm-" +"ascend/commit/74033999ed98fcf02db0a2512d019177d0843791)" + +#: ../../source/community/contributors.md +msgid "227" +msgstr "227" + +#: ../../source/community/contributors.md +msgid "[@GuoRen868](https://github.com/GuoRen868)" +msgstr "[@GuoRen868](https://github.com/GuoRen868)" + +#: ../../source/community/contributors.md +msgid "" +"[4bd1030](https://github.com/vllm-project/vllm-" +"ascend/commit/4bd1030842a3831109ccfb136056459cd83c1441)" +msgstr "" +"[4bd1030](https://github.com/vllm-project/vllm-" +"ascend/commit/4bd1030842a3831109ccfb136056459cd83c1441)" + +#: ../../source/community/contributors.md +msgid "226" +msgstr "226" + +#: ../../source/community/contributors.md +msgid "[@AlvisGong](https://github.com/AlvisGong)" +msgstr "[@AlvisGong](https://github.com/AlvisGong)" + +#: ../../source/community/contributors.md +msgid "" +"[a5163c8](https://github.com/vllm-project/vllm-" +"ascend/commit/a5163c8c369e046c53e6127bbbcee23392cec069)" +msgstr "" +"[a5163c8](https://github.com/vllm-project/vllm-" +"ascend/commit/a5163c8c369e046c53e6127bbbcee23392cec069)" + +#: ../../source/community/contributors.md +msgid "225" +msgstr "225" + +#: ../../source/community/contributors.md +msgid "[@MingYang119](https://github.com/MingYang119)" +msgstr "[@MingYang119](https://github.com/MingYang119)" + +#: ../../source/community/contributors.md +msgid "2025/12/03" +msgstr "2025/12/03" + +#: ../../source/community/contributors.md +msgid "" +"[18b90b5](https://github.com/vllm-project/vllm-" +"ascend/commit/18b90b501d6aad1d9426dcdee1ccfbe8139dd47d)" +msgstr "" +"[18b90b5](https://github.com/vllm-project/vllm-" +"ascend/commit/18b90b501d6aad1d9426dcdee1ccfbe8139dd47d)" + +#: ../../source/community/contributors.md +msgid "224" +msgstr "224" + +#: ../../source/community/contributors.md +msgid "[@amy-why-3459](https://github.com/amy-why-3459)" +msgstr "[@amy-why-3459](https://github.com/amy-why-3459)" + +#: ../../source/community/contributors.md +msgid "" +"[26e8e58](https://github.com/vllm-project/vllm-" +"ascend/commit/26e8e58cea8ba5cc5edaef19e72d0ddc4e9f1c1c)" +msgstr "" +"[26e8e58](https://github.com/vllm-project/vllm-" +"ascend/commit/26e8e58cea8ba5cc5edaef19e72d0ddc4e9f1c1c)" + +#: ../../source/community/contributors.md +msgid "223" +msgstr "223" + +#: ../../source/community/contributors.md +msgid "[@coder-fny](https://github.com/coder-fny)" +msgstr "[@coder-fny](https://github.com/coder-fny)" + +#: ../../source/community/contributors.md +msgid "2025/12/02" +msgstr "2025/12/02" + +#: ../../source/community/contributors.md +msgid "" +"[1b5513a](https://github.com/vllm-project/vllm-" +"ascend/commit/1b5513aa914aa37cb539d410faf6e021e6dc5481)" +msgstr "" +"[1b5513a](https://github.com/vllm-project/vllm-" +"ascend/commit/1b5513aa914aa37cb539d410faf6e021e6dc5481)" + +#: ../../source/community/contributors.md +msgid "222" +msgstr "222" + +#: ../../source/community/contributors.md +msgid "[@swy20190](https://github.com/swy20190)" +msgstr "[@swy20190](https://github.com/swy20190)" + +#: ../../source/community/contributors.md +msgid "2025/12/01" +msgstr "2025/12/01" + +#: ../../source/community/contributors.md +msgid "" +"[f4871c6](https://github.com/vllm-project/vllm-" +"ascend/commit/f4871c6ab98af9ab2766779b2167de04805f38e2)" +msgstr "" +"[f4871c6](https://github.com/vllm-project/vllm-" +"ascend/commit/f4871c6ab98af9ab2766779b2167de04805f38e2)" + +#: ../../source/community/contributors.md +msgid "221" +msgstr "221" + +#: ../../source/community/contributors.md +msgid "[@fluctlux](https://github.com/fluctlux)" +msgstr "[@fluctlux](https://github.com/fluctlux)" + +#: ../../source/community/contributors.md +msgid "" +"[f1f6370](https://github.com/vllm-project/vllm-" +"ascend/commit/f1f6370ed966bd554f8874ddc430c09f25354c72)" +msgstr "" +"[f1f6370](https://github.com/vllm-project/vllm-" +"ascend/commit/f1f6370ed966bd554f8874ddc430c09f25354c72)" + +#: ../../source/community/contributors.md +msgid "220" +msgstr "220" + +#: ../../source/community/contributors.md +msgid "[@LHXuuu](https://github.com/LHXuuu)" +msgstr "[@LHXuuu](https://github.com/LHXuuu)" + +#: ../../source/community/contributors.md +msgid "2025/11/28" +msgstr "2025/11/28" + +#: ../../source/community/contributors.md +msgid "" +"[bdc6697](https://github.com/vllm-project/vllm-" +"ascend/commit/bdc66972db12cc9f4b63721495fb2b103942dd25)" +msgstr "" +"[bdc6697](https://github.com/vllm-project/vllm-" +"ascend/commit/bdc66972db12cc9f4b63721495fb2b103942dd25)" + +#: ../../source/community/contributors.md +msgid "219" +msgstr "219" + +#: ../../source/community/contributors.md +msgid "[@ChenxiQ](https://github.com/ChenxiQ)" +msgstr "[@ChenxiQ](https://github.com/ChenxiQ)" + +#: ../../source/community/contributors.md +msgid "" +"[554f16a](https://github.com/vllm-project/vllm-" +"ascend/commit/554f16ae1fb89b35ad82b24e7f7fe5eaba0e80d0)" +msgstr "" +"[554f16a](https://github.com/vllm-project/vllm-" +"ascend/commit/554f16ae1fb89b35ad82b24e7f7fe5eaba0e80d0)" + +#: ../../source/community/contributors.md +msgid "218" +msgstr "218" + +#: ../../source/community/contributors.md +msgid "[@zjchenn](https://github.com/zjchenn)" +msgstr "[@zjchenn](https://github.com/zjchenn)" + +#: ../../source/community/contributors.md +msgid "2025/11/25" +msgstr "2025/11/25" + +#: ../../source/community/contributors.md +msgid "" +"[463910e](https://github.com/vllm-project/vllm-" +"ascend/commit/463910e686013acc611a7e3ccb70c3a776c8ffef)" +msgstr "" +"[463910e](https://github.com/vllm-project/vllm-" +"ascend/commit/463910e686013acc611a7e3ccb70c3a776c8ffef)" + +#: ../../source/community/contributors.md +msgid "217" +msgstr "217" + +#: ../../source/community/contributors.md +msgid "[@wujinyuan1](https://github.com/wujinyuan1)" +msgstr "[@wujinyuan1](https://github.com/wujinyuan1)" + +#: ../../source/community/contributors.md +msgid "" +"[386a85e](https://github.com/vllm-project/vllm-" +"ascend/commit/386a85eccc9a2247180faad2f8a00ad9c0afa4b1)" +msgstr "" +"[386a85e](https://github.com/vllm-project/vllm-" +"ascend/commit/386a85eccc9a2247180faad2f8a00ad9c0afa4b1)" + +#: ../../source/community/contributors.md +msgid "216" +msgstr "216" + +#: ../../source/community/contributors.md +msgid "[@Tjh-UKN](https://github.com/Tjh-UKN)" +msgstr "[@Tjh-UKN](https://github.com/Tjh-UKN)" + +#: ../../source/community/contributors.md +msgid "2025/11/24" +msgstr "2025/11/24" + +#: ../../source/community/contributors.md +msgid "" +"[00ea61e](https://github.com/vllm-project/vllm-" +"ascend/commit/00ea61ec885e21ed0e51dc8e751cb27cfa539dc3)" +msgstr "" +"[00ea61e](https://github.com/vllm-project/vllm-" +"ascend/commit/00ea61ec885e21ed0e51dc8e751cb27cfa539dc3)" + +#: ../../source/community/contributors.md +msgid "215" +msgstr "215" + +#: ../../source/community/contributors.md +msgid "[@mazhixin000](https://github.com/mazhixin000)" +msgstr "[@mazhixin000](https://github.com/mazhixin000)" + +#: ../../source/community/contributors.md +msgid "2025/11/22" +msgstr "2025/11/22" + +#: ../../source/community/contributors.md +msgid "" +"[ab51fce](https://github.com/vllm-project/vllm-" +"ascend/commit/ab51fcea4ca77fdb642a04018df60e18586d52ef)" +msgstr "" +"[ab51fce](https://github.com/vllm-project/vllm-" +"ascend/commit/ab51fcea4ca77fdb642a04018df60e18586d52ef)" + +#: ../../source/community/contributors.md +msgid "214" +msgstr "214" + +#: ../../source/community/contributors.md +msgid "[@shenchuxiaofugui](https://github.com/shenchuxiaofugui)" +msgstr "[@shenchuxiaofugui](https://github.com/shenchuxiaofugui)" + +#: ../../source/community/contributors.md +msgid "2025/11/21" +msgstr "2025/11/21" + +#: ../../source/community/contributors.md +msgid "" +"[019c7de](https://github.com/vllm-project/vllm-" +"ascend/commit/019c7ded91f0c2a9533ace8d87f0cf56e396f979)" +msgstr "" +"[019c7de](https://github.com/vllm-project/vllm-" +"ascend/commit/019c7ded91f0c2a9533ace8d87f0cf56e396f979)" + +#: ../../source/community/contributors.md +msgid "213" +msgstr "213" + +#: ../../source/community/contributors.md +msgid "[@InSec](https://github.com/InSec)" +msgstr "[@InSec](https://github.com/InSec)" + +#: ../../source/community/contributors.md +msgid "" +"[5a4e8cd](https://github.com/vllm-project/vllm-" +"ascend/commit/5a4e8cdebabd8293e9ff61b7014d758b71ebf32a)" +msgstr "" +"[5a4e8cd](https://github.com/vllm-project/vllm-" +"ascend/commit/5a4e8cdebabd8293e9ff61b7014d758b71ebf32a)" + +#: ../../source/community/contributors.md +msgid "212" +msgstr "212" + +#: ../../source/community/contributors.md +msgid "[@Delphine-Nic](https://github.com/Delphine-Nic)" +msgstr "[@Delphine-Nic](https://github.com/Delphine-Nic)" + +#: ../../source/community/contributors.md +msgid "2025/11/19" +msgstr "2025/11/19" + +#: ../../source/community/contributors.md +msgid "" +"[a3e9673](https://github.com/vllm-project/vllm-" +"ascend/commit/a3e9673137dd31d77b7441d657a0fc394ac7bc59)" +msgstr "" +"[a3e9673](https://github.com/vllm-project/vllm-" +"ascend/commit/a3e9673137dd31d77b7441d657a0fc394ac7bc59)" + +#: ../../source/community/contributors.md +msgid "211" +msgstr "211" + +#: ../../source/community/contributors.md +msgid "[@wangxiaochao6](https://github.com/wangxiaochao6)" +msgstr "[@wangxiaochao6](https://github.com/wangxiaochao6)" + +#: ../../source/community/contributors.md +msgid "2025/11/18" +msgstr "2025/11/18" + +#: ../../source/community/contributors.md +msgid "" +"[0d04ad8](https://github.com/vllm-project/vllm-" +"ascend/commit/0d04ad8c8f00081932566576da1e6d1dcd963d8d)" +msgstr "" +"[0d04ad8](https://github.com/vllm-project/vllm-" +"ascend/commit/0d04ad8c8f00081932566576da1e6d1dcd963d8d)" + +#: ../../source/community/contributors.md +msgid "210" +msgstr "210" + +#: ../../source/community/contributors.md +msgid "[@845473182](https://github.com/845473182)" +msgstr "[@845473182](https://github.com/845473182)" + +#: ../../source/community/contributors.md +msgid "2025/11/14" +msgstr "2025/11/14" + +#: ../../source/community/contributors.md +msgid "" +"[f90ed95](https://github.com/vllm-project/vllm-" +"ascend/commit/f90ed95578f81ddd1cb96c9bd5d6ef8be8475667)" +msgstr "" +"[f90ed95](https://github.com/vllm-project/vllm-" +"ascend/commit/f90ed95578f81ddd1cb96c9bd5d6ef8be8475667)" + +#: ../../source/community/contributors.md +msgid "209" +msgstr "209" + +#: ../../source/community/contributors.md +msgid "[@thonean](https://github.com/thonean)" +msgstr "[@thonean](https://github.com/thonean)" + +#: ../../source/community/contributors.md +msgid "2025/11/12" +msgstr "2025/11/12" + +#: ../../source/community/contributors.md +msgid "" +"[e38fe92](https://github.com/vllm-project/vllm-" +"ascend/commit/e38fe92f4037fe668f34b23c67daa9695a3fc3c7)" +msgstr "" +"[e38fe92](https://github.com/vllm-project/vllm-" +"ascend/commit/e38fe92f4037fe668f34b23c67daa9695a3fc3c7)" + +#: ../../source/community/contributors.md +msgid "208" +msgstr "208" + +#: ../../source/community/contributors.md +msgid "[@zhaomingyu13](https://github.com/zhaomingyu13)" +msgstr "[@zhaomingyu13](https://github.com/zhaomingyu13)" + +#: ../../source/community/contributors.md +msgid "2025/11/11" +msgstr "2025/11/11" + +#: ../../source/community/contributors.md +msgid "" +"[7ffbe73](https://github.com/vllm-project/vllm-" +"ascend/commit/7ffbe73d54d7257c571ddd21bac6543b5ead0dac)" +msgstr "" +"[7ffbe73](https://github.com/vllm-project/vllm-" +"ascend/commit/7ffbe73d54d7257c571ddd21bac6543b5ead0dac)" + +#: ../../source/community/contributors.md +msgid "207" +msgstr "207" + +#: ../../source/community/contributors.md +msgid "[@Copilot](https://github.com/Copilot)" +msgstr "[@Copilot](https://github.com/Copilot)" + +#: ../../source/community/contributors.md +msgid "" +"[24bca67](https://github.com/vllm-project/vllm-" +"ascend/commit/24bca674412b56418c94bda7d659105315505a8e)" +msgstr "" +"[24bca67](https://github.com/vllm-project/vllm-" +"ascend/commit/24bca674412b56418c94bda7d659105315505a8e)" + +#: ../../source/community/contributors.md +msgid "206" +msgstr "206" + +#: ../../source/community/contributors.md +msgid "[@Apocalypse990923-qshi](https://github.com/Apocalypse990923-qshi)" +msgstr "[@Apocalypse990923-qshi](https://github.com/Apocalypse990923-qshi)" + +#: ../../source/community/contributors.md +msgid "" +"[71866d5](https://github.com/vllm-project/vllm-" +"ascend/commit/71866d531151b36a9af4db36ba8b3b74162f7028)" +msgstr "" +"[71866d5](https://github.com/vllm-project/vllm-" +"ascend/commit/71866d531151b36a9af4db36ba8b3b74162f7028)" + +#: ../../source/community/contributors.md +msgid "205" +msgstr "205" + +#: ../../source/community/contributors.md +msgid "[@herizhen](https://github.com/herizhen)" +msgstr "[@herizhen](https://github.com/herizhen)" + +#: ../../source/community/contributors.md +msgid "2025/11/10" +msgstr "2025年11月10日" + +#: ../../source/community/contributors.md +msgid "" +"[75c3f9a](https://github.com/vllm-project/vllm-" +"ascend/commit/75c3f9a7807daa3346685be88e4f06d6a5f362f0)" +msgstr "" +"[75c3f9a](https://github.com/vllm-project/vllm-" +"ascend/commit/75c3f9a7807daa3346685be88e4f06d6a5f362f0)" + +#: ../../source/community/contributors.md +msgid "204" +msgstr "204" + +#: ../../source/community/contributors.md +msgid "[@wangx700](https://github.com/wangx700)" +msgstr "[@wangx700](https://github.com/wangx700)" + +#: ../../source/community/contributors.md +msgid "2025/11/08" +msgstr "2025年11月08日" + +#: ../../source/community/contributors.md +msgid "" +"[55e37f5](https://github.com/vllm-project/vllm-" +"ascend/commit/55e37f50418f38861768556fdde9e86c2a22aef4)" +msgstr "" +"[55e37f5](https://github.com/vllm-project/vllm-" +"ascend/commit/55e37f50418f38861768556fdde9e86c2a22aef4)" + +#: ../../source/community/contributors.md +msgid "203" +msgstr "203" + +#: ../../source/community/contributors.md +msgid "[@Semmer2](https://github.com/Semmer2)" +msgstr "[@Semmer2](https://github.com/Semmer2)" + +#: ../../source/community/contributors.md +msgid "" +"[f984256](https://github.com/vllm-project/vllm-" +"ascend/commit/f9842560cbd5bb7b1106b3f47982adb479a91774)" +msgstr "" +"[f984256](https://github.com/vllm-project/vllm-" +"ascend/commit/f9842560cbd5bb7b1106b3f47982adb479a91774)" + +#: ../../source/community/contributors.md +msgid "202" +msgstr "202" + +#: ../../source/community/contributors.md +msgid "[@Liziqi-77](https://github.com/Liziqi-77)" +msgstr "[@Liziqi-77](https://github.com/Liziqi-77)" + +#: ../../source/community/contributors.md +msgid "2025/11/06" +msgstr "2025年11月06日" + +#: ../../source/community/contributors.md +msgid "" +"[25b24c0](https://github.com/vllm-project/vllm-" +"ascend/commit/25b24c02ea86f0f8a673f8f332e800ed72abc083)" +msgstr "" +"[25b24c0](https://github.com/vllm-project/vllm-" +"ascend/commit/25b24c02ea86f0f8a673f8f332e800ed72abc083)" + +#: ../../source/community/contributors.md +msgid "201" +msgstr "201" + +#: ../../source/community/contributors.md +msgid "[@Pz1116](https://github.com/Pz1116)" +msgstr "[@Pz1116](https://github.com/Pz1116)" + +#: ../../source/community/contributors.md +msgid "2025/11/05" +msgstr "2025年11月05日" + +#: ../../source/community/contributors.md +msgid "" +"[e0c23cb](https://github.com/vllm-project/vllm-" +"ascend/commit/e0c23cb011e7ee6e5afce32ab0a0b9c73e825baa)" +msgstr "" +"[e0c23cb](https://github.com/vllm-project/vllm-" +"ascend/commit/e0c23cb011e7ee6e5afce32ab0a0b9c73e825baa)" + +#: ../../source/community/contributors.md +msgid "200" +msgstr "200" + +#: ../../source/community/contributors.md +msgid "[@MrZ20](https://github.com/MrZ20)" +msgstr "[@MrZ20](https://github.com/MrZ20)" + +#: ../../source/community/contributors.md +msgid "2025/11/04" +msgstr "2025年11月04日" + +#: ../../source/community/contributors.md +msgid "" +"[dc1a6cb](https://github.com/vllm-project/vllm-" +"ascend/commit/dc1a6cb5039b5ee1520fde8cf85b99f7ce33ec7b)" +msgstr "" +"[dc1a6cb](https://github.com/vllm-project/vllm-" +"ascend/commit/dc1a6cb5039b5ee1520fde8cf85b99f7ce33ec7b)" + +#: ../../source/community/contributors.md +msgid "199" +msgstr "199" + +#: ../../source/community/contributors.md +msgid "[@ForBetterCodeNine](https://github.com/ForBetterCodeNine)" +msgstr "[@ForBetterCodeNine](https://github.com/ForBetterCodeNine)" + +#: ../../source/community/contributors.md +msgid "2025/11/03" +msgstr "2025年11月03日" + +#: ../../source/community/contributors.md +msgid "" +"[49d7478](https://github.com/vllm-project/vllm-" +"ascend/commit/49d74785c440c964db1a1212c9b24f695f21a5a2)" +msgstr "" +"[49d7478](https://github.com/vllm-project/vllm-" +"ascend/commit/49d74785c440c964db1a1212c9b24f695f21a5a2)" + +#: ../../source/community/contributors.md +msgid "198" +msgstr "198" + +#: ../../source/community/contributors.md +msgid "[@Nagisa125](https://github.com/Nagisa125)" +msgstr "[@Nagisa125](https://github.com/Nagisa125)" + +#: ../../source/community/contributors.md +msgid "2025/10/31" +msgstr "2025年10月31日" + +#: ../../source/community/contributors.md +msgid "" +"[6764777](https://github.com/vllm-project/vllm-" +"ascend/commit/6764777f000f6ac5acfadbba3797ce966397e1d8)" +msgstr "" +"[6764777](https://github.com/vllm-project/vllm-" +"ascend/commit/6764777f000f6ac5acfadbba3797ce966397e1d8)" + +#: ../../source/community/contributors.md +msgid "197" +msgstr "197" + +#: ../../source/community/contributors.md +msgid "[@Liwx1014](https://github.com/Liwx1014)" +msgstr "[@Liwx1014](https://github.com/Liwx1014)" + +#: ../../source/community/contributors.md +msgid "2025/10/30" +msgstr "2025年10月30日" + +#: ../../source/community/contributors.md +msgid "" +"[eed1957](https://github.com/vllm-project/vllm-" +"ascend/commit/eed1957f03224bf800191390244c39ff5bb362d1)" +msgstr "" +"[eed1957](https://github.com/vllm-project/vllm-" +"ascend/commit/eed1957f03224bf800191390244c39ff5bb362d1)" + +#: ../../source/community/contributors.md +msgid "196" +msgstr "196" + +#: ../../source/community/contributors.md +msgid "[@Meihan-chen](https://github.com/Meihan-chen)" +msgstr "[@Meihan-chen](https://github.com/Meihan-chen)" + +#: ../../source/community/contributors.md +msgid "2025/10/29" +msgstr "2025年10月29日" + +#: ../../source/community/contributors.md +msgid "" +"[cba69e1](https://github.com/vllm-project/vllm-" +"ascend/commit/cba69e117eaa8c5d31fca64b01aecb22d3162860)" +msgstr "" +"[cba69e1](https://github.com/vllm-project/vllm-" +"ascend/commit/cba69e117eaa8c5d31fca64b01aecb22d3162860)" + +#: ../../source/community/contributors.md +msgid "195" +msgstr "195" + +#: ../../source/community/contributors.md +msgid "[@Levi-JQ](https://github.com/Levi-JQ)" +msgstr "[@Levi-JQ](https://github.com/Levi-JQ)" + +#: ../../source/community/contributors.md +msgid "2025/10/27" +msgstr "2025年10月27日" + +#: ../../source/community/contributors.md +msgid "" +"[d64bdd0](https://github.com/vllm-project/vllm-" +"ascend/commit/d64bdd06ae656048040c34c0ff3b909293b5113b)" +msgstr "" +"[d64bdd0](https://github.com/vllm-project/vllm-" +"ascend/commit/d64bdd06ae656048040c34c0ff3b909293b5113b)" + +#: ../../source/community/contributors.md +msgid "194" +msgstr "194" + +#: ../../source/community/contributors.md +msgid "[@QilaiZhang](https://github.com/QilaiZhang)" +msgstr "[@QilaiZhang](https://github.com/QilaiZhang)" + +#: ../../source/community/contributors.md +msgid "2025/10/25" +msgstr "2025年10月25日" + +#: ../../source/community/contributors.md +msgid "" +"[d30bb95](https://github.com/vllm-project/vllm-" +"ascend/commit/d30bb95b9040475495dc01f5c9e38576905be621)" +msgstr "" +"[d30bb95](https://github.com/vllm-project/vllm-" +"ascend/commit/d30bb95b9040475495dc01f5c9e38576905be621)" + +#: ../../source/community/contributors.md +msgid "193" +msgstr "193" + +#: ../../source/community/contributors.md +msgid "[@gcanlin](https://github.com/gcanlin)" +msgstr "[@gcanlin](https://github.com/gcanlin)" + +#: ../../source/community/contributors.md +msgid "" +"[8295136](https://github.com/vllm-project/vllm-" +"ascend/commit/829513657518c0077622b1904f49d79657b2537a)" +msgstr "" +"[8295136](https://github.com/vllm-project/vllm-" +"ascend/commit/829513657518c0077622b1904f49d79657b2537a)" + +#: ../../source/community/contributors.md +msgid "192" +msgstr "192" + +#: ../../source/community/contributors.md +msgid "[@ck-hw-1018](https://github.com/ck-hw-1018)" +msgstr "[@ck-hw-1018](https://github.com/ck-hw-1018)" + +#: ../../source/community/contributors.md +msgid "" +"[7572939](https://github.com/vllm-project/vllm-" +"ascend/commit/7572939b94e9c2215cf3ebd30bd6f61c4d1b04f6)" +msgstr "" +"[7572939](https://github.com/vllm-project/vllm-" +"ascend/commit/7572939b94e9c2215cf3ebd30bd6f61c4d1b04f6)" + +#: ../../source/community/contributors.md +msgid "191" +msgstr "191" + +#: ../../source/community/contributors.md +msgid "[@yenuo26](https://github.com/yenuo26)" +msgstr "[@yenuo26](https://github.com/yenuo26)" + +#: ../../source/community/contributors.md +msgid "2025/10/24" +msgstr "2025年10月24日" + +#: ../../source/community/contributors.md +msgid "" +"[d301c56](https://github.com/vllm-project/vllm-" +"ascend/commit/d301c56d1aff5d2f408e54285df5ff36fd28d193)" +msgstr "" +"[d301c56](https://github.com/vllm-project/vllm-" +"ascend/commit/d301c56d1aff5d2f408e54285df5ff36fd28d193)" + +#: ../../source/community/contributors.md +msgid "190" +msgstr "190" + +#: ../../source/community/contributors.md +msgid "[@luoxiaolin712](https://github.com/luoxiaolin712)" +msgstr "[@luoxiaolin712](https://github.com/luoxiaolin712)" + +#: ../../source/community/contributors.md +msgid "" +"[59bb16b](https://github.com/vllm-project/vllm-" +"ascend/commit/59bb16b75c492aa636fec53f1d32680839e7d1e7)" +msgstr "" +"[59bb16b](https://github.com/vllm-project/vllm-" +"ascend/commit/59bb16b75c492aa636fec53f1d32680839e7d1e7)" + +#: ../../source/community/contributors.md +msgid "189" +msgstr "189" + +#: ../../source/community/contributors.md +msgid "[@lio1226](https://github.com/lio1226)" +msgstr "[@lio1226](https://github.com/lio1226)" + +#: ../../source/community/contributors.md +msgid "" +"[cd58a64](https://github.com/vllm-project/vllm-" +"ascend/commit/cd58a643c598622fdf5a853764f851a5074d0328)" +msgstr "" +"[cd58a64](https://github.com/vllm-project/vllm-" +"ascend/commit/cd58a643c598622fdf5a853764f851a5074d0328)" + +#: ../../source/community/contributors.md +msgid "188" +msgstr "188" + +#: ../../source/community/contributors.md +msgid "[@yzy1996](https://github.com/yzy1996)" +msgstr "[@yzy1996](https://github.com/yzy1996)" + +#: ../../source/community/contributors.md +msgid "2025/10/23" +msgstr "2025/10/23" + +#: ../../source/community/contributors.md +msgid "" +"[f06a6ca](https://github.com/vllm-project/vllm-" +"ascend/commit/f06a6cad1b0c6b142d4a9364c65b4b920b251540)" +msgstr "" +"[f06a6ca](https://github.com/vllm-project/vllm-" +"ascend/commit/f06a6cad1b0c6b142d4a9364c65b4b920b251540)" + +#: ../../source/community/contributors.md +msgid "187" +msgstr "187" + +#: ../../source/community/contributors.md +msgid "[@wlf-darkmatter](https://github.com/wlf-darkmatter)" +msgstr "[@wlf-darkmatter](https://github.com/wlf-darkmatter)" + +#: ../../source/community/contributors.md +msgid "" +"[097173e](https://github.com/vllm-project/vllm-" +"ascend/commit/097173e50fc9efe2bf4e46bf7d59b8f5e06f2679)" +msgstr "" +"[097173e](https://github.com/vllm-project/vllm-" +"ascend/commit/097173e50fc9efe2bf4e46bf7d59b8f5e06f2679)" + +#: ../../source/community/contributors.md +msgid "186" +msgstr "186" + +#: ../../source/community/contributors.md +msgid "[@HuaJiaHeng](https://github.com/HuaJiaHeng)" +msgstr "[@HuaJiaHeng](https://github.com/HuaJiaHeng)" + +#: ../../source/community/contributors.md +msgid "" +"[062257f](https://github.com/vllm-project/vllm-" +"ascend/commit/062257f624486d15315013b624ce71e1a388ee20)" +msgstr "" +"[062257f](https://github.com/vllm-project/vllm-" +"ascend/commit/062257f624486d15315013b624ce71e1a388ee20)" + +#: ../../source/community/contributors.md +msgid "185" +msgstr "185" + +#: ../../source/community/contributors.md +msgid "[@destinysky](https://github.com/destinysky)" +msgstr "[@destinysky](https://github.com/destinysky)" + +#: ../../source/community/contributors.md +msgid "" +"[427b17e](https://github.com/vllm-project/vllm-" +"ascend/commit/427b17e2da1d6d967e7581b40b6bd169cdb324c8)" +msgstr "" +"[427b17e](https://github.com/vllm-project/vllm-" +"ascend/commit/427b17e2da1d6d967e7581b40b6bd169cdb324c8)" + +#: ../../source/community/contributors.md +msgid "184" +msgstr "184" + +#: ../../source/community/contributors.md +msgid "[@KyrieDrewWang](https://github.com/KyrieDrewWang)" +msgstr "[@KyrieDrewWang](https://github.com/KyrieDrewWang)" + +#: ../../source/community/contributors.md +msgid "2025/10/22" +msgstr "2025/10/22" + +#: ../../source/community/contributors.md +msgid "" +"[60e2be1](https://github.com/vllm-project/vllm-" +"ascend/commit/60e2be1b366975f7183c38cd158da811969b9230)" +msgstr "" +"[60e2be1](https://github.com/vllm-project/vllm-" +"ascend/commit/60e2be1b366975f7183c38cd158da811969b9230)" + +#: ../../source/community/contributors.md +msgid "183" +msgstr "183" + +#: ../../source/community/contributors.md +msgid "[@HF-001](https://github.com/HF-001)" +msgstr "[@HF-001](https://github.com/HF-001)" + +#: ../../source/community/contributors.md +msgid "" +"[bc30874](https://github.com/vllm-project/vllm-" +"ascend/commit/bc30874f8b6212ff8eb45b2546c360660c49c9c1)" +msgstr "" +"[bc30874](https://github.com/vllm-project/vllm-" +"ascend/commit/bc30874f8b6212ff8eb45b2546c360660c49c9c1)" + +#: ../../source/community/contributors.md +msgid "182" +msgstr "182" + +#: ../../source/community/contributors.md +msgid "[@drslark](https://github.com/drslark)" +msgstr "[@drslark](https://github.com/drslark)" + +#: ../../source/community/contributors.md +msgid "2025/10/21" +msgstr "2025/10/21" + +#: ../../source/community/contributors.md +msgid "" +"[534f32d](https://github.com/vllm-project/vllm-" +"ascend/commit/534f32d27c0cc48731fbdae3701fbb6c3bb4332a)" +msgstr "" +"[534f32d](https://github.com/vllm-project/vllm-" +"ascend/commit/534f32d27c0cc48731fbdae3701fbb6c3bb4332a)" + +#: ../../source/community/contributors.md +msgid "181" +msgstr "181" + +#: ../../source/community/contributors.md +msgid "[@Anionex](https://github.com/Anionex)" +msgstr "[@Anionex](https://github.com/Anionex)" + +#: ../../source/community/contributors.md +msgid "" +"[5f8b169](https://github.com/vllm-project/vllm-" +"ascend/commit/5f8b1699ae35bb8c046d8b385d215cd208bc3fcb)" +msgstr "" +"[5f8b169](https://github.com/vllm-project/vllm-" +"ascend/commit/5f8b1699ae35bb8c046d8b385d215cd208bc3fcb)" + +#: ../../source/community/contributors.md +msgid "180" +msgstr "180" + +#: ../../source/community/contributors.md +msgid "[@leijie-ww](https://github.com/leijie-ww)" +msgstr "[@leijie-ww](https://github.com/leijie-ww)" + +#: ../../source/community/contributors.md +msgid "2025/10/20" +msgstr "2025/10/20" + +#: ../../source/community/contributors.md +msgid "" +"[6b68579](https://github.com/vllm-project/vllm-" +"ascend/commit/6b6857929d8397fbec1cd132eb4ea5d13712497c)" +msgstr "" +"[6b68579](https://github.com/vllm-project/vllm-" +"ascend/commit/6b6857929d8397fbec1cd132eb4ea5d13712497c)" + +#: ../../source/community/contributors.md +msgid "179" +msgstr "179" + +#: ../../source/community/contributors.md +msgid "[@ZYang6263](https://github.com/ZYang6263)" +msgstr "[@ZYang6263](https://github.com/ZYang6263)" + +#: ../../source/community/contributors.md +msgid "2025/10/19" +msgstr "2025/10/19" + +#: ../../source/community/contributors.md +msgid "" +"[1e78ecb](https://github.com/vllm-project/vllm-" +"ascend/commit/1e78ecbad626968c7e65fab5596c90ea79e7454d)" +msgstr "" +"[1e78ecb](https://github.com/vllm-project/vllm-" +"ascend/commit/1e78ecbad626968c7e65fab5596c90ea79e7454d)" + +#: ../../source/community/contributors.md +msgid "178" +msgstr "178" + +#: ../../source/community/contributors.md +msgid "[@yechao237](https://github.com/yechao237)" +msgstr "[@yechao237](https://github.com/yechao237)" + +#: ../../source/community/contributors.md +msgid "2025/10/18" +msgstr "2025/10/18" + +#: ../../source/community/contributors.md +msgid "" +"[4750d45](https://github.com/vllm-project/vllm-" +"ascend/commit/4750d45d86632b7085ab9f0b57070cd17aa9d108)" +msgstr "" +"[4750d45](https://github.com/vllm-project/vllm-" +"ascend/commit/4750d45d86632b7085ab9f0b57070cd17aa9d108)" + +#: ../../source/community/contributors.md +msgid "177" +msgstr "177" + +#: ../../source/community/contributors.md +msgid "[@Shirley125](https://github.com/Shirley125)" +msgstr "[@Shirley125](https://github.com/Shirley125)" + +#: ../../source/community/contributors.md +msgid "" +"[b4233a2](https://github.com/vllm-project/vllm-" +"ascend/commit/b4233a2ec35a91beca4b8c8402ea7cf6394b62c9)" +msgstr "" +"[b4233a2](https://github.com/vllm-project/vllm-" +"ascend/commit/b4233a2ec35a91beca4b8c8402ea7cf6394b62c9)" + +#: ../../source/community/contributors.md +msgid "176" +msgstr "176" + +#: ../../source/community/contributors.md +msgid "[@DreamerLeader](https://github.com/DreamerLeader)" +msgstr "[@DreamerLeader](https://github.com/DreamerLeader)" + +#: ../../source/community/contributors.md +msgid "2025/10/15" +msgstr "2025/10/15" + +#: ../../source/community/contributors.md +msgid "" +"[aa61547](https://github.com/vllm-project/vllm-" +"ascend/commit/aa6154703aeeb259611326a3fba095b6abc5b791)" +msgstr "" +"[aa61547](https://github.com/vllm-project/vllm-" +"ascend/commit/aa6154703aeeb259611326a3fba095b6abc5b791)" + +#: ../../source/community/contributors.md +msgid "175" +msgstr "175" + +#: ../../source/community/contributors.md +msgid "[@yuzhup](https://github.com/yuzhup)" +msgstr "[@yuzhup](https://github.com/yuzhup)" + +#: ../../source/community/contributors.md +msgid "2025/10/14" +msgstr "2025/10/14" + +#: ../../source/community/contributors.md +msgid "" +"[7877723](https://github.com/vllm-project/vllm-" +"ascend/commit/78777237a9ae95af88ce6068b000635f96aafe25)" +msgstr "" +"[7877723](https://github.com/vllm-project/vllm-" +"ascend/commit/78777237a9ae95af88ce6068b000635f96aafe25)" + +#: ../../source/community/contributors.md +msgid "174" +msgstr "174" + +#: ../../source/community/contributors.md +msgid "[@menogrey](https://github.com/menogrey)" +msgstr "[@menogrey](https://github.com/menogrey)" + +#: ../../source/community/contributors.md +msgid "" +"[657c08c](https://github.com/vllm-project/vllm-" +"ascend/commit/657c08cfb266c63946ea61a7be377b6de55d20f3)" +msgstr "" +"[657c08c](https://github.com/vllm-project/vllm-" +"ascend/commit/657c08cfb266c63946ea61a7be377b6de55d20f3)" + +#: ../../source/community/contributors.md +msgid "173" +msgstr "173" + +#: ../../source/community/contributors.md +msgid "[@elilzhu](https://github.com/elilzhu)" +msgstr "[@elilzhu](https://github.com/elilzhu)" + +#: ../../source/community/contributors.md +msgid "" +"[5c45c22](https://github.com/vllm-project/vllm-" +"ascend/commit/5c45c227dc254591f4a9345e67a84a0d5fe1c345)" +msgstr "" +"[5c45c22](https://github.com/vllm-project/vllm-" +"ascend/commit/5c45c227dc254591f4a9345e67a84a0d5fe1c345)" + +#: ../../source/community/contributors.md +msgid "172" +msgstr "172" + +#: ../../source/community/contributors.md +msgid "[@dsxsteven](https://github.com/dsxsteven)" +msgstr "[@dsxsteven](https://github.com/dsxsteven)" + +#: ../../source/community/contributors.md +msgid "2025/10/13" +msgstr "2025/10/13" + +#: ../../source/community/contributors.md +msgid "" +"[847d12a](https://github.com/vllm-project/vllm-" +"ascend/commit/847d12a389217e4cbcc5fff70abd72d9b15ad5c4)" +msgstr "" +"[847d12a](https://github.com/vllm-project/vllm-" +"ascend/commit/847d12a389217e4cbcc5fff70abd72d9b15ad5c4)" + +#: ../../source/community/contributors.md +msgid "171" +msgstr "171" + +#: ../../source/community/contributors.md +msgid "[@kiscad](https://github.com/kiscad)" +msgstr "[@kiscad](https://github.com/kiscad)" + +#: ../../source/community/contributors.md +msgid "2025/10/12" +msgstr "2025/10/12" + +#: ../../source/community/contributors.md +msgid "" +"[bcc313e](https://github.com/vllm-project/vllm-" +"ascend/commit/bcc313e8f2bfe9a53ed88dffffd93632db52a4ba)" +msgstr "" +"[bcc313e](https://github.com/vllm-project/vllm-" +"ascend/commit/bcc313e8f2bfe9a53ed88dffffd93632db52a4ba)" + +#: ../../source/community/contributors.md +msgid "170" +msgstr "170" + +#: ../../source/community/contributors.md +msgid "[@jiangyunfan1](https://github.com/jiangyunfan1)" +msgstr "[@jiangyunfan1](https://github.com/jiangyunfan1)" + +#: ../../source/community/contributors.md +msgid "" +"[d05d29f](https://github.com/vllm-project/vllm-" +"ascend/commit/d05d29ff0e7c5d261dbdee5e98ff0ca95135217b)" +msgstr "" +"[d05d29f](https://github.com/vllm-project/vllm-" +"ascend/commit/d05d29ff0e7c5d261dbdee5e98ff0ca95135217b)" + +#: ../../source/community/contributors.md +msgid "169" +msgstr "169" + +#: ../../source/community/contributors.md +msgid "[@huangdong2022](https://github.com/huangdong2022)" +msgstr "[@huangdong2022](https://github.com/huangdong2022)" + +#: ../../source/community/contributors.md +msgid "2025/10/09" +msgstr "2025/10/09" + +#: ../../source/community/contributors.md +msgid "" +"[23db56a](https://github.com/vllm-project/vllm-" +"ascend/commit/23db56a34063c5285f7d34e80e45c1888b877bd3)" +msgstr "" +"[23db56a](https://github.com/vllm-project/vllm-" +"ascend/commit/23db56a34063c5285f7d34e80e45c1888b877bd3)" + +#: ../../source/community/contributors.md +msgid "168" +msgstr "168" + +#: ../../source/community/contributors.md +msgid "[@zhenwenqi2024](https://github.com/zhenwenqi2024)" +msgstr "[@zhenwenqi2024](https://github.com/zhenwenqi2024)" + +#: ../../source/community/contributors.md +msgid "2025/09/30" +msgstr "2025/09/30" + +#: ../../source/community/contributors.md +msgid "" +"[9dd8621](https://github.com/vllm-project/vllm-" +"ascend/commit/9dd8621212a26b52e98de58488c2a6cd34cde76e)" +msgstr "" +"[9dd8621](https://github.com/vllm-project/vllm-" +"ascend/commit/9dd8621212a26b52e98de58488c2a6cd34cde76e)" + +#: ../../source/community/contributors.md +msgid "167" +msgstr "167" + +#: ../../source/community/contributors.md +msgid "[@zhangsicheng5](https://github.com/zhangsicheng5)" +msgstr "[@zhangsicheng5](https://github.com/zhangsicheng5)" + +#: ../../source/community/contributors.md +msgid "" +"[657f065](https://github.com/vllm-project/vllm-" +"ascend/commit/657f065f4247689b95c2edea00696114de5b1c48)" +msgstr "" +"[657f065](https://github.com/vllm-project/vllm-" +"ascend/commit/657f065f4247689b95c2edea00696114de5b1c48)" + +#: ../../source/community/contributors.md +msgid "166" +msgstr "166" + +#: ../../source/community/contributors.md +msgid "[@kkrazy](https://github.com/kkrazy)" +msgstr "[@kkrazy](https://github.com/kkrazy)" + +#: ../../source/community/contributors.md +msgid "2025/09/29" +msgstr "2025/09/29" + +#: ../../source/community/contributors.md +msgid "" +"[f62c866](https://github.com/vllm-project/vllm-" +"ascend/commit/f62c8660fd80856ccfc4c57fb9ed5b05df8b5654)" +msgstr "" +"[f62c866](https://github.com/vllm-project/vllm-" +"ascend/commit/f62c8660fd80856ccfc4c57fb9ed5b05df8b5654)" + +#: ../../source/community/contributors.md +msgid "165" +msgstr "165" + +#: ../../source/community/contributors.md +msgid "[@socrahow](https://github.com/socrahow)" +msgstr "[@socrahow](https://github.com/socrahow)" + +#: ../../source/community/contributors.md +msgid "2025/09/28" +msgstr "2025/09/28" + +#: ../../source/community/contributors.md +msgid "" +"[c3fee66](https://github.com/vllm-project/vllm-" +"ascend/commit/c3fee66806f252476796389ea73d13a8aca60146)" +msgstr "" +"[c3fee66](https://github.com/vllm-project/vllm-" +"ascend/commit/c3fee66806f252476796389ea73d13a8aca60146)" + +#: ../../source/community/contributors.md +msgid "164" +msgstr "164" + +#: ../../source/community/contributors.md +msgid "[@slippersss](https://github.com/slippersss)" +msgstr "[@slippersss](https://github.com/slippersss)" + +#: ../../source/community/contributors.md +msgid "" +"[a86ece5](https://github.com/vllm-project/vllm-" +"ascend/commit/a86ece5e399db9aa9d7186ab7e51bc0e0dad4134)" +msgstr "" +"[a86ece5](https://github.com/vllm-project/vllm-" +"ascend/commit/a86ece5e399db9aa9d7186ab7e51bc0e0dad4134)" + +#: ../../source/community/contributors.md +msgid "163" +msgstr "163" + +#: ../../source/community/contributors.md +msgid "[@florenceCH](https://github.com/florenceCH)" +msgstr "[@florenceCH](https://github.com/florenceCH)" + +#: ../../source/community/contributors.md +msgid "2025/09/26" +msgstr "2025/09/26" + +#: ../../source/community/contributors.md +msgid "" +"[14497b7](https://github.com/vllm-project/vllm-" +"ascend/commit/14497b748d778f95328f55a957e997ad80a492da)" +msgstr "" +"[14497b7](https://github.com/vllm-project/vllm-" +"ascend/commit/14497b748d778f95328f55a957e997ad80a492da)" + +#: ../../source/community/contributors.md +msgid "162" +msgstr "162" + +#: ../../source/community/contributors.md +msgid "[@mfyCn-1204](https://github.com/mfyCn-1204)" +msgstr "[@mfyCn-1204](https://github.com/mfyCn-1204)" + +#: ../../source/community/contributors.md +msgid "2025/09/25" +msgstr "2025/09/25" + +#: ../../source/community/contributors.md +msgid "" +"[33c118c](https://github.com/vllm-project/vllm-" +"ascend/commit/33c118c80e70cec64c9369b7ba4088c61c44bd31)" +msgstr "" +"[33c118c](https://github.com/vllm-project/vllm-" +"ascend/commit/33c118c80e70cec64c9369b7ba4088c61c44bd31)" + +#: ../../source/community/contributors.md +msgid "161" +msgstr "161" + +#: ../../source/community/contributors.md +msgid "[@dragondream-chen](https://github.com/dragondream-chen)" +msgstr "[@dragondream-chen](https://github.com/dragondream-chen)" + +#: ../../source/community/contributors.md +msgid "" +"[07f4710](https://github.com/vllm-project/vllm-" +"ascend/commit/07f4710216da1d61c521fb3bdf4ba90ea1794474)" +msgstr "" +"[07f4710](https://github.com/vllm-project/vllm-" +"ascend/commit/07f4710216da1d61c521fb3bdf4ba90ea1794474)" + +#: ../../source/community/contributors.md +msgid "160" +msgstr "160" + +#: ../../source/community/contributors.md +msgid "[@zzhx1](https://github.com/zzhx1)" +msgstr "[@zzhx1](https://github.com/zzhx1)" + +#: ../../source/community/contributors.md +msgid "2025/09/24" +msgstr "2025/09/24" + +#: ../../source/community/contributors.md +msgid "" +"[4ee58e2](https://github.com/vllm-project/vllm-" +"ascend/commit/4ee58e213b421ece745dba6e94967e6f557263ce)" +msgstr "" +"[4ee58e2](https://github.com/vllm-project/vllm-" +"ascend/commit/4ee58e213b421ece745dba6e94967e6f557263ce)" + +#: ../../source/community/contributors.md +msgid "159" +msgstr "159" + +#: ../../source/community/contributors.md +msgid "[@Csrayz](https://github.com/Csrayz)" +msgstr "[@Csrayz](https://github.com/Csrayz)" + +#: ../../source/community/contributors.md +msgid "" +"[80524f5](https://github.com/vllm-project/vllm-" +"ascend/commit/80524f571152e978f6e2808e504e1aa8b246a2c1)" +msgstr "" +"[80524f5](https://github.com/vllm-project/vllm-" +"ascend/commit/80524f571152e978f6e2808e504e1aa8b246a2c1)" + +#: ../../source/community/contributors.md +msgid "158" +msgstr "158" + +#: ../../source/community/contributors.md +msgid "[@clrs97](https://github.com/clrs97)" +msgstr "[@clrs97](https://github.com/clrs97)" + +#: ../../source/community/contributors.md +msgid "" +"[cd1ffbb](https://github.com/vllm-project/vllm-" +"ascend/commit/cd1ffbb6cd88a3f265027424bd3cca74d1efb1ea)" +msgstr "" +"[cd1ffbb](https://github.com/vllm-project/vllm-" +"ascend/commit/cd1ffbb6cd88a3f265027424bd3cca74d1efb1ea)" + +#: ../../source/community/contributors.md +msgid "157" +msgstr "157" + +#: ../../source/community/contributors.md +msgid "[@Clorist33](https://github.com/Clorist33)" +msgstr "[@Clorist33](https://github.com/Clorist33)" + +#: ../../source/community/contributors.md +msgid "" +"[302494c](https://github.com/vllm-project/vllm-" +"ascend/commit/302494c1febfb648ac2a5001390bbd9b59db6bc2)" +msgstr "" +"[302494c](https://github.com/vllm-project/vllm-" +"ascend/commit/302494c1febfb648ac2a5001390bbd9b59db6bc2)" + +#: ../../source/community/contributors.md +msgid "156" +msgstr "156" + +#: ../../source/community/contributors.md +msgid "[@booker123456](https://github.com/booker123456)" +msgstr "[@booker123456](https://github.com/booker123456)" + +#: ../../source/community/contributors.md +msgid "" +"[c4b976a](https://github.com/vllm-project/vllm-" +"ascend/commit/c4b976af1a6459a82f7556c53dc98c850dd3e3cd)" +msgstr "" +"[c4b976a](https://github.com/vllm-project/vllm-" +"ascend/commit/c4b976af1a6459a82f7556c53dc98c850dd3e3cd)" + +#: ../../source/community/contributors.md +msgid "155" +msgstr "155" + +#: ../../source/community/contributors.md +msgid "[@Mercykid-bash](https://github.com/Mercykid-bash)" +msgstr "[@Mercykid-bash](https://github.com/Mercykid-bash)" + +#: ../../source/community/contributors.md +msgid "2025/09/23" +msgstr "2025/09/23" + +#: ../../source/community/contributors.md +msgid "" +"[29c173a](https://github.com/vllm-project/vllm-" +"ascend/commit/29c173ab48001f3d26805db2d833be712687fb1a)" +msgstr "" +"[29c173a](https://github.com/vllm-project/vllm-" +"ascend/commit/29c173ab48001f3d26805db2d833be712687fb1a)" + +#: ../../source/community/contributors.md +msgid "154" +msgstr "154" + +#: ../../source/community/contributors.md +msgid "[@MaoJianwei](https://github.com/MaoJianwei)" +msgstr "[@MaoJianwei](https://github.com/MaoJianwei)" + +#: ../../source/community/contributors.md +msgid "" +"[d586255](https://github.com/vllm-project/vllm-" +"ascend/commit/d586255678d974d74b1fe798838594c0e948d6b6)" +msgstr "" +"[d586255](https://github.com/vllm-project/vllm-" +"ascend/commit/d586255678d974d74b1fe798838594c0e948d6b6)" + +#: ../../source/community/contributors.md +msgid "153" +msgstr "153" + +#: ../../source/community/contributors.md +msgid "[@Lucaskabela](https://github.com/Lucaskabela)" +msgstr "[@Lucaskabela](https://github.com/Lucaskabela)" + +#: ../../source/community/contributors.md +msgid "2025/09/20" +msgstr "2025/09/20" + +#: ../../source/community/contributors.md +msgid "" +"[53ecd89](https://github.com/vllm-project/vllm-" +"ascend/commit/53ecd89e8ff405302be040a76effa8c012cbaaeb)" +msgstr "" +"[53ecd89](https://github.com/vllm-project/vllm-" +"ascend/commit/53ecd89e8ff405302be040a76effa8c012cbaaeb)" + +#: ../../source/community/contributors.md +msgid "152" +msgstr "152" + +#: ../../source/community/contributors.md +msgid "[@jesse996](https://github.com/jesse996)" +msgstr "[@jesse996](https://github.com/jesse996)" + +#: ../../source/community/contributors.md +msgid "2025/09/19" +msgstr "2025/09/19" + +#: ../../source/community/contributors.md +msgid "" +"[833cd1b](https://github.com/vllm-project/vllm-" +"ascend/commit/833cd1b698f3d467bb0a6a60cbf20ebc5535f5c9)" +msgstr "" +"[833cd1b](https://github.com/vllm-project/vllm-" +"ascend/commit/833cd1b698f3d467bb0a6a60cbf20ebc5535f5c9)" + +#: ../../source/community/contributors.md +msgid "151" +msgstr "151" + +#: ../../source/community/contributors.md +msgid "[@offline893](https://github.com/offline893)" +msgstr "[@offline893](https://github.com/offline893)" + +#: ../../source/community/contributors.md +msgid "2025/09/17" +msgstr "2025/09/17" + +#: ../../source/community/contributors.md +msgid "" +"[76844ee](https://github.com/vllm-project/vllm-" +"ascend/commit/76844eec78a23f482a4e0dfe9684898a6ef35fb2)" +msgstr "" +"[76844ee](https://github.com/vllm-project/vllm-" +"ascend/commit/76844eec78a23f482a4e0dfe9684898a6ef35fb2)" + +#: ../../source/community/contributors.md +msgid "150" +msgstr "150" + +#: ../../source/community/contributors.md +msgid "[@1Fire4](https://github.com/1Fire4)" +msgstr "[@1Fire4](https://github.com/1Fire4)" + +#: ../../source/community/contributors.md +msgid "" +"[1f6465c](https://github.com/vllm-project/vllm-" +"ascend/commit/1f6465c399d6e699f88e28419c106573bd6c44f0)" +msgstr "" +"[1f6465c](https://github.com/vllm-project/vllm-" +"ascend/commit/1f6465c399d6e699f88e28419c106573bd6c44f0)" + +#: ../../source/community/contributors.md +msgid "149" +msgstr "149" + +#: ../../source/community/contributors.md +msgid "[@invalid-email-address](https://github.com/invalid-email-address)" +msgstr "[@invalid-email-address](https://github.com/invalid-email-address)" + +#: ../../source/community/contributors.md +msgid "2025/09/14" +msgstr "2025/09/14" + +#: ../../source/community/contributors.md +msgid "" +"[c9da5de](https://github.com/vllm-project/vllm-" +"ascend/commit/c9da5dea5c271187c0119848ede9c0518a0c41b2)" +msgstr "" +"[c9da5de](https://github.com/vllm-project/vllm-" +"ascend/commit/c9da5dea5c271187c0119848ede9c0518a0c41b2)" + +#: ../../source/community/contributors.md +msgid "148" +msgstr "148" + +#: ../../source/community/contributors.md +msgid "[@nwpu-zxr](https://github.com/nwpu-zxr)" +msgstr "[@nwpu-zxr](https://github.com/nwpu-zxr)" + +#: ../../source/community/contributors.md +msgid "2025/09/13" +msgstr "2025/09/13" + +#: ../../source/community/contributors.md +msgid "" +"[0a27705](https://github.com/vllm-project/vllm-" +"ascend/commit/0a27705917e64993a8a76198ac6e30980578fe60)" +msgstr "" +"[0a27705](https://github.com/vllm-project/vllm-" +"ascend/commit/0a27705917e64993a8a76198ac6e30980578fe60)" + +#: ../../source/community/contributors.md +msgid "147" +msgstr "147" + +#: ../../source/community/contributors.md +msgid "[@zhaozx-cn](https://github.com/zhaozx-cn)" +msgstr "[@zhaozx-cn](https://github.com/zhaozx-cn)" + +#: ../../source/community/contributors.md +msgid "2025/09/11" +msgstr "2025/09/11" + +#: ../../source/community/contributors.md +msgid "" +"[b9a0a75](https://github.com/vllm-project/vllm-" +"ascend/commit/b9a0a75c783571caf22129612fb3338272d1782c)" +msgstr "" +"[b9a0a75](https://github.com/vllm-project/vllm-" +"ascend/commit/b9a0a75c783571caf22129612fb3338272d1782c)" + +#: ../../source/community/contributors.md +msgid "146" +msgstr "146" + +#: ../../source/community/contributors.md +msgid "[@wyu0-0](https://github.com/wyu0-0)" +msgstr "[@wyu0-0](https://github.com/wyu0-0)" + +#: ../../source/community/contributors.md +msgid "" +"[eab3635](https://github.com/vllm-project/vllm-" +"ascend/commit/eab3635850ba351af81d76a7b4b3db46ffb7f697)" +msgstr "" +"[eab3635](https://github.com/vllm-project/vllm-" +"ascend/commit/eab3635850ba351af81d76a7b4b3db46ffb7f697)" + +#: ../../source/community/contributors.md +msgid "145" +msgstr "145" + +#: ../../source/community/contributors.md +msgid "[@wuweiqiang24](https://github.com/wuweiqiang24)" +msgstr "[@wuweiqiang24](https://github.com/wuweiqiang24)" + +#: ../../source/community/contributors.md +msgid "" +"[9615dea](https://github.com/vllm-project/vllm-" +"ascend/commit/9615dea3a71df8ecd2c591f284d9615140dce68a)" +msgstr "" +"[9615dea](https://github.com/vllm-project/vllm-" +"ascend/commit/9615dea3a71df8ecd2c591f284d9615140dce68a)" + +#: ../../source/community/contributors.md +msgid "144" +msgstr "144" + +#: ../../source/community/contributors.md +msgid "[@wenba0](https://github.com/wenba0)" +msgstr "[@wenba0](https://github.com/wenba0)" + +#: ../../source/community/contributors.md +msgid "" +"[bd3dede](https://github.com/vllm-project/vllm-" +"ascend/commit/bd3dedea6123c9c8c19fe83b6e05716f63b1285d)" +msgstr "" +"[bd3dede](https://github.com/vllm-project/vllm-" +"ascend/commit/bd3dedea6123c9c8c19fe83b6e05716f63b1285d)" + +#: ../../source/community/contributors.md +msgid "143" +msgstr "143" + +#: ../../source/community/contributors.md +msgid "[@anon189Ty](https://github.com/anon189Ty)" +msgstr "[@anon189Ty](https://github.com/anon189Ty)" + +#: ../../source/community/contributors.md +msgid "" +"[7b2ecc1](https://github.com/vllm-project/vllm-" +"ascend/commit/7b2ecc1e9a64aeda78e2137aa06abdbf2890c000)" +msgstr "" +"[7b2ecc1](https://github.com/vllm-project/vllm-" +"ascend/commit/7b2ecc1e9a64aeda78e2137aa06abdbf2890c000)" + +#: ../../source/community/contributors.md +msgid "142" +msgstr "142" + +#: ../../source/community/contributors.md +msgid "[@fffrog](https://github.com/fffrog)" +msgstr "[@fffrog](https://github.com/fffrog)" + +#: ../../source/community/contributors.md +msgid "2025/09/10" +msgstr "2025/09/10" + +#: ../../source/community/contributors.md +msgid "" +"[b7ee3fd](https://github.com/vllm-project/vllm-" +"ascend/commit/b7ee3fdad30d00d9aaa31be04315838c7e2c24ac)" +msgstr "" +"[b7ee3fd](https://github.com/vllm-project/vllm-" +"ascend/commit/b7ee3fdad30d00d9aaa31be04315838c7e2c24ac)" + +#: ../../source/community/contributors.md +msgid "141" +msgstr "141" + +#: ../../source/community/contributors.md +msgid "[@fan2956](https://github.com/fan2956)" +msgstr "[@fan2956](https://github.com/fan2956)" + +#: ../../source/community/contributors.md +msgid "" +"[bfc9cdc](https://github.com/vllm-project/vllm-" +"ascend/commit/bfc9cdc1e3a7c4a148ae3e19b9380443e2f78550)" +msgstr "" +"[bfc9cdc](https://github.com/vllm-project/vllm-" +"ascend/commit/bfc9cdc1e3a7c4a148ae3e19b9380443e2f78550)" + +#: ../../source/community/contributors.md +msgid "140" +msgstr "140" + +#: ../../source/community/contributors.md +msgid "2025/09/08" +msgstr "2025/09/08" + +#: ../../source/community/contributors.md +msgid "" +"[d3c3538](https://github.com/vllm-project/vllm-" +"ascend/commit/d3c3538ddc67ba8f4873637e2bc1052f9eb09e93)" +msgstr "" +"[d3c3538](https://github.com/vllm-project/vllm-" +"ascend/commit/d3c3538ddc67ba8f4873637e2bc1052f9eb09e93)" + +#: ../../source/community/contributors.md +msgid "139" +msgstr "139" + +#: ../../source/community/contributors.md +msgid "[@marcobarlo](https://github.com/marcobarlo)" +msgstr "[@marcobarlo](https://github.com/marcobarlo)" + +#: ../../source/community/contributors.md +msgid "" +"[6666e52](https://github.com/vllm-project/vllm-" +"ascend/commit/6666e5265d40ecafc3cb377233fee840d7fe553b)" +msgstr "" +"[6666e52](https://github.com/vllm-project/vllm-" +"ascend/commit/6666e5265d40ecafc3cb377233fee840d7fe553b)" + +#: ../../source/community/contributors.md +msgid "138" +msgstr "138" + +#: ../../source/community/contributors.md +msgid "[@machenglong2025](https://github.com/machenglong2025)" +msgstr "[@machenglong2025](https://github.com/machenglong2025)" + +#: ../../source/community/contributors.md +msgid "" +"[1a82b16](https://github.com/vllm-project/vllm-" +"ascend/commit/1a82b16355d2ec0ba01c23935092dc0af323b820)" +msgstr "" +"[1a82b16](https://github.com/vllm-project/vllm-" +"ascend/commit/1a82b16355d2ec0ba01c23935092dc0af323b820)" + +#: ../../source/community/contributors.md +msgid "137" +msgstr "137" + +#: ../../source/community/contributors.md +msgid "[@1092626063](https://github.com/1092626063)" +msgstr "[@1092626063](https://github.com/1092626063)" + +#: ../../source/community/contributors.md +msgid "2025/09/05" +msgstr "2025/09/05" + +#: ../../source/community/contributors.md +msgid "" +"[5b3646a](https://github.com/vllm-project/vllm-" +"ascend/commit/5b3646ab2142131579661ce12e4f0e4ba731ad06)" +msgstr "" +"[5b3646a](https://github.com/vllm-project/vllm-" +"ascend/commit/5b3646ab2142131579661ce12e4f0e4ba731ad06)" + +#: ../../source/community/contributors.md +msgid "136" +msgstr "136" + +#: ../../source/community/contributors.md +msgid "[@WithHades](https://github.com/WithHades)" +msgstr "[@WithHades](https://github.com/WithHades)" + +#: ../../source/community/contributors.md +msgid "2025/09/04" +msgstr "2025/09/04" + +#: ../../source/community/contributors.md +msgid "" +"[0c0789b](https://github.com/vllm-project/vllm-" +"ascend/commit/0c0789be7442122eb1203abbf89a9592648922e0)" +msgstr "" +"[0c0789b](https://github.com/vllm-project/vllm-" +"ascend/commit/0c0789be7442122eb1203abbf89a9592648922e0)" + +#: ../../source/community/contributors.md +msgid "135" +msgstr "135" + +#: ../../source/community/contributors.md +msgid "[@vllm-ascend-ci](https://github.com/vllm-ascend-ci)" +msgstr "[@vllm-ascend-ci](https://github.com/vllm-ascend-ci)" + +#: ../../source/community/contributors.md +msgid "" +"[3a2a7d8](https://github.com/vllm-project/vllm-" +"ascend/commit/3a2a7d88dbaf4793fb8d9405040bb28348df28e9)" +msgstr "" +"[3a2a7d8](https://github.com/vllm-project/vllm-" +"ascend/commit/3a2a7d88dbaf4793fb8d9405040bb28348df28e9)" + +#: ../../source/community/contributors.md +msgid "134" +msgstr "134" + +#: ../../source/community/contributors.md +msgid "[@baxingpiaochong](https://github.com/baxingpiaochong)" +msgstr "[@baxingpiaochong](https://github.com/baxingpiaochong)" + +#: ../../source/community/contributors.md +msgid "" +"[df88a2e](https://github.com/vllm-project/vllm-" +"ascend/commit/df88a2ecc8116a42d79a13fa1a8a05a03c70324f)" +msgstr "" +"[df88a2e](https://github.com/vllm-project/vllm-" +"ascend/commit/df88a2ecc8116a42d79a13fa1a8a05a03c70324f)" + +#: ../../source/community/contributors.md +msgid "133" +msgstr "133" + +#: ../../source/community/contributors.md +msgid "[@zzy-ContiLearn](https://github.com/zzy-ContiLearn)" +msgstr "[@zzy-ContiLearn](https://github.com/zzy-ContiLearn)" + +#: ../../source/community/contributors.md +msgid "2025/09/03" +msgstr "2025/09/03" + +#: ../../source/community/contributors.md +msgid "" +"[07d44ad](https://github.com/vllm-project/vllm-" +"ascend/commit/07d44ade194b018ae2cc172482d55cb746c5fd0e)" +msgstr "" +"[07d44ad](https://github.com/vllm-project/vllm-" +"ascend/commit/07d44ade194b018ae2cc172482d55cb746c5fd0e)" + +#: ../../source/community/contributors.md +msgid "132" +msgstr "132" + +#: ../../source/community/contributors.md +msgid "[@panchao-hub](https://github.com/panchao-hub)" +msgstr "[@panchao-hub](https://github.com/panchao-hub)" + +#: ../../source/community/contributors.md +msgid "2025/08/30" +msgstr "2025/08/30" + +#: ../../source/community/contributors.md +msgid "" +"[7215454](https://github.com/vllm-project/vllm-" +"ascend/commit/7215454de6df78f4f9a49a99c5739f8bb360f5bc)" +msgstr "" +"[7215454](https://github.com/vllm-project/vllm-" +"ascend/commit/7215454de6df78f4f9a49a99c5739f8bb360f5bc)" + +#: ../../source/community/contributors.md +msgid "131" +msgstr "131" + +#: ../../source/community/contributors.md +msgid "[@wangxiaoteng888](https://github.com/wangxiaoteng888)" +msgstr "[@wangxiaoteng888](https://github.com/wangxiaoteng888)" + +#: ../../source/community/contributors.md +msgid "2025/08/29" +msgstr "2025/08/29" + +#: ../../source/community/contributors.md +msgid "" +"[ee6d141](https://github.com/vllm-project/vllm-" +"ascend/commit/ee6d141dd4aa7f6bcef1b52d49614bd74705822f)" +msgstr "" +"[ee6d141](https://github.com/vllm-project/vllm-" +"ascend/commit/ee6d141dd4aa7f6bcef1b52d49614bd74705822f)" + +#: ../../source/community/contributors.md +msgid "130" +msgstr "130" + +#: ../../source/community/contributors.md +msgid "[@lidenghui1110](https://github.com/lidenghui1110)" +msgstr "[@lidenghui1110](https://github.com/lidenghui1110)" + +#: ../../source/community/contributors.md +msgid "" +"[600b08f](https://github.com/vllm-project/vllm-" +"ascend/commit/600b08f7542be3409c2c70927c91471e8de33d03)" +msgstr "" +"[600b08f](https://github.com/vllm-project/vllm-" +"ascend/commit/600b08f7542be3409c2c70927c91471e8de33d03)" + +#: ../../source/community/contributors.md +msgid "129" +msgstr "129" + +#: ../../source/community/contributors.md +msgid "[@NSDie](https://github.com/NSDie)" +msgstr "[@NSDie](https://github.com/NSDie)" + +#: ../../source/community/contributors.md +msgid "2025/08/28" +msgstr "2025/08/28" + +#: ../../source/community/contributors.md +msgid "" +"[1191a64](https://github.com/vllm-project/vllm-" +"ascend/commit/1191a64ae508183d5613711bc98a90250963f83a)" +msgstr "" +"[1191a64](https://github.com/vllm-project/vllm-" +"ascend/commit/1191a64ae508183d5613711bc98a90250963f83a)" + +#: ../../source/community/contributors.md +msgid "128" +msgstr "128" + +#: ../../source/community/contributors.md +msgid "[@s-jiayang](https://github.com/s-jiayang)" +msgstr "[@s-jiayang](https://github.com/s-jiayang)" + +#: ../../source/community/contributors.md +msgid "2025/08/27" +msgstr "2025/08/27" + +#: ../../source/community/contributors.md +msgid "" +"[6a4ec18](https://github.com/vllm-project/vllm-" +"ascend/commit/6a4ec186e731b9516235f4fd30b5b98227513fe7)" +msgstr "" +"[6a4ec18](https://github.com/vllm-project/vllm-" +"ascend/commit/6a4ec186e731b9516235f4fd30b5b98227513fe7)" + +#: ../../source/community/contributors.md +msgid "127" +msgstr "127" + +#: ../../source/community/contributors.md +msgid "[@ZhaoJiangJiang](https://github.com/ZhaoJiangJiang)" +msgstr "[@ZhaoJiangJiang](https://github.com/ZhaoJiangJiang)" + +#: ../../source/community/contributors.md +msgid "2025/08/22" +msgstr "2025/08/22" + +#: ../../source/community/contributors.md +msgid "" +"[3629bc4](https://github.com/vllm-project/vllm-" +"ascend/commit/3629bc4431d3edb4224761f9036b3bddb16158d6)" +msgstr "" +"[3629bc4](https://github.com/vllm-project/vllm-" +"ascend/commit/3629bc4431d3edb4224761f9036b3bddb16158d6)" + +#: ../../source/community/contributors.md +msgid "126" +msgstr "126" + +#: ../../source/community/contributors.md +msgid "[@LookAround0301](https://github.com/LookAround0301)" +msgstr "[@LookAround0301](https://github.com/LookAround0301)" + +#: ../../source/community/contributors.md +msgid "" +"[e9fb895](https://github.com/vllm-project/vllm-" +"ascend/commit/e9fb895b10cef37ea634f4d4af71686b09ca9f20)" +msgstr "" +"[e9fb895](https://github.com/vllm-project/vllm-" +"ascend/commit/e9fb895b10cef37ea634f4d4af71686b09ca9f20)" + +#: ../../source/community/contributors.md +msgid "125" +msgstr "125" + +#: ../../source/community/contributors.md +msgid "[@NicholasTao](https://github.com/NicholasTao)" +msgstr "[@NicholasTao](https://github.com/NicholasTao)" + +#: ../../source/community/contributors.md +msgid "2025/08/20" +msgstr "2025/08/20" + +#: ../../source/community/contributors.md +msgid "" +"[7bec1a9](https://github.com/vllm-project/vllm-" +"ascend/commit/7bec1a9b9c372785551d45682bf11063ec42b216)" +msgstr "" +"[7bec1a9](https://github.com/vllm-project/vllm-" +"ascend/commit/7bec1a9b9c372785551d45682bf11063ec42b216)" + +#: ../../source/community/contributors.md +msgid "124" +msgstr "124" + +#: ../../source/community/contributors.md +msgid "[@liuchenbing](https://github.com/liuchenbing)" +msgstr "[@liuchenbing](https://github.com/liuchenbing)" + +#: ../../source/community/contributors.md +msgid "2025/08/19" +msgstr "2025/08/19" + +#: ../../source/community/contributors.md +msgid "" +"[3648d18](https://github.com/vllm-project/vllm-" +"ascend/commit/3648d18e673f15a33a82d6ea95d3a9dd891ff1f5)" +msgstr "" +"[3648d18](https://github.com/vllm-project/vllm-" +"ascend/commit/3648d18e673f15a33a82d6ea95d3a9dd891ff1f5)" + +#: ../../source/community/contributors.md +msgid "123" +msgstr "123" + +#: ../../source/community/contributors.md +msgid "[@gameofdimension](https://github.com/gameofdimension)" +msgstr "[@gameofdimension](https://github.com/gameofdimension)" + +#: ../../source/community/contributors.md +msgid "" +"[27d038d](https://github.com/vllm-project/vllm-" +"ascend/commit/27d038dc663bf550a35a8f15659493b2abefda07)" +msgstr "" +"[27d038d](https://github.com/vllm-project/vllm-" +"ascend/commit/27d038dc663bf550a35a8f15659493b2abefda07)" + +#: ../../source/community/contributors.md +msgid "122" +msgstr "122" + +#: ../../source/community/contributors.md +msgid "2025/08/18" +msgstr "2025/08/18" + +#: ../../source/community/contributors.md +msgid "" +"[03ca2b2](https://github.com/vllm-project/vllm-" +"ascend/commit/03ca2b26ca9ab6b9a12f021b0595a726ee35e223)" +msgstr "" +"[03ca2b2](https://github.com/vllm-project/vllm-" +"ascend/commit/03ca2b26ca9ab6b9a12f021b0595a726ee35e223)" + +#: ../../source/community/contributors.md +msgid "121" +msgstr "121" + +#: ../../source/community/contributors.md +msgid "[@hust17yixuan](https://github.com/hust17yixuan)" +msgstr "[@hust17yixuan](https://github.com/hust17yixuan)" + +#: ../../source/community/contributors.md +msgid "2025/08/14" +msgstr "2025/08/14" + +#: ../../source/community/contributors.md +msgid "" +"[17c2884](https://github.com/vllm-project/vllm-" +"ascend/commit/17c2884f43011724def1bfe8ac68f9952e90b677)" +msgstr "" +"[17c2884](https://github.com/vllm-project/vllm-" +"ascend/commit/17c2884f43011724def1bfe8ac68f9952e90b677)" + +#: ../../source/community/contributors.md +msgid "120" +msgstr "120" + +#: ../../source/community/contributors.md +msgid "[@QwertyJack](https://github.com/QwertyJack)" +msgstr "[@QwertyJack](https://github.com/QwertyJack)" + +#: ../../source/community/contributors.md +msgid "2025/08/11" +msgstr "2025/08/11" + +#: ../../source/community/contributors.md +msgid "" +"[9c6d108](https://github.com/vllm-project/vllm-" +"ascend/commit/9c6d108330574176f79eea52f989ea6049336af8)" +msgstr "" +"[9c6d108](https://github.com/vllm-project/vllm-" +"ascend/commit/9c6d108330574176f79eea52f989ea6049336af8)" + +#: ../../source/community/contributors.md +msgid "119" +msgstr "119" + +#: ../../source/community/contributors.md +msgid "[@haojiangzheng](https://github.com/haojiangzheng)" +msgstr "[@haojiangzheng](https://github.com/haojiangzheng)" + +#: ../../source/community/contributors.md +msgid "" +"[eb43a47](https://github.com/vllm-project/vllm-" +"ascend/commit/eb43a475f429192e7509e85e28b1c65d5097f373)" +msgstr "" +"[eb43a47](https://github.com/vllm-project/vllm-" +"ascend/commit/eb43a475f429192e7509e85e28b1c65d5097f373)" + +#: ../../source/community/contributors.md +msgid "118" +msgstr "118" + +#: ../../source/community/contributors.md +msgid "[@yangqinghao-cmss](https://github.com/yangqinghao-cmss)" +msgstr "[@yangqinghao-cmss](https://github.com/yangqinghao-cmss)" + +#: ../../source/community/contributors.md +msgid "2025/08/01" +msgstr "2025/08/01" + +#: ../../source/community/contributors.md +msgid "" +"[99fa0ac](https://github.com/vllm-project/vllm-" +"ascend/commit/99fa0ac882c79ae9282940125b042a44ea422757)" +msgstr "" +"[99fa0ac](https://github.com/vllm-project/vllm-" +"ascend/commit/99fa0ac882c79ae9282940125b042a44ea422757)" + +#: ../../source/community/contributors.md +msgid "117" +msgstr "117" + +#: ../../source/community/contributors.md +msgid "[@Liccol](https://github.com/Liccol)" +msgstr "[@Liccol](https://github.com/Liccol)" + +#: ../../source/community/contributors.md +msgid "2025/07/31" +msgstr "2025/07/31" + +#: ../../source/community/contributors.md +msgid "" +"[7c90ba5](https://github.com/vllm-project/vllm-" +"ascend/commit/7c90ba5fe8e420b891fdd30df050a33e3767835d)" +msgstr "" +"[7c90ba5](https://github.com/vllm-project/vllm-" +"ascend/commit/7c90ba5fe8e420b891fdd30df050a33e3767835d)" + +#: ../../source/community/contributors.md +msgid "116" +msgstr "116" + +#: ../../source/community/contributors.md +msgid "[@1024daniel](https://github.com/1024daniel)" +msgstr "[@1024daniel](https://github.com/1024daniel)" + +#: ../../source/community/contributors.md +msgid "" +"[db310c6](https://github.com/vllm-project/vllm-" +"ascend/commit/db310c6ec97b056296f7c2348b90c1d96d0b562a)" +msgstr "" +"[db310c6](https://github.com/vllm-project/vllm-" +"ascend/commit/db310c6ec97b056296f7c2348b90c1d96d0b562a)" + +#: ../../source/community/contributors.md +msgid "115" +msgstr "115" + +#: ../../source/community/contributors.md +msgid "[@YuanCheng-coder](https://github.com/YuanCheng-coder)" +msgstr "[@YuanCheng-coder](https://github.com/YuanCheng-coder)" + +#: ../../source/community/contributors.md +msgid "2025/07/30" +msgstr "2025/07/30" + +#: ../../source/community/contributors.md +msgid "" +"[34dd24a](https://github.com/vllm-project/vllm-" +"ascend/commit/34dd24adf21fb85a2c413292754b1599832efae2)" +msgstr "" +"[34dd24a](https://github.com/vllm-project/vllm-" +"ascend/commit/34dd24adf21fb85a2c413292754b1599832efae2)" + +#: ../../source/community/contributors.md +msgid "114" +msgstr "114" + +#: ../../source/community/contributors.md +msgid "[@hongfugui](https://github.com/hongfugui)" +msgstr "[@hongfugui](https://github.com/hongfugui)" + +#: ../../source/community/contributors.md +msgid "" +"[1dbb888](https://github.com/vllm-project/vllm-" +"ascend/commit/1dbb8882759e4326f5706f6e610674423376c2f3)" +msgstr "" +"[1dbb888](https://github.com/vllm-project/vllm-" +"ascend/commit/1dbb8882759e4326f5706f6e610674423376c2f3)" + +#: ../../source/community/contributors.md +msgid "113" +msgstr "113" + +#: ../../source/community/contributors.md +msgid "[@taoxudonghaha](https://github.com/taoxudonghaha)" +msgstr "[@taoxudonghaha](https://github.com/taoxudonghaha)" + +#: ../../source/community/contributors.md +msgid "2025/07/29" +msgstr "2025/07/29" + +#: ../../source/community/contributors.md +msgid "" +"[540336e](https://github.com/vllm-project/vllm-" +"ascend/commit/540336edc9db09072a9aaa486fbf7ce625da5b9e)" +msgstr "" +"[540336e](https://github.com/vllm-project/vllm-" +"ascend/commit/540336edc9db09072a9aaa486fbf7ce625da5b9e)" + +#: ../../source/community/contributors.md +msgid "112" +msgstr "112" + +#: ../../source/community/contributors.md +msgid "[@wanghanqingLYT](https://github.com/wanghanqingLYT)" +msgstr "[@wanghanqingLYT](https://github.com/wanghanqingLYT)" + +#: ../../source/community/contributors.md +msgid "2025/07/26" +msgstr "2025/07/26" + +#: ../../source/community/contributors.md +msgid "" +"[833d33c](https://github.com/vllm-project/vllm-" +"ascend/commit/833d33cf09117a32e17cf4057b29709011f6e815)" +msgstr "" +"[833d33c](https://github.com/vllm-project/vllm-" +"ascend/commit/833d33cf09117a32e17cf4057b29709011f6e815)" + +#: ../../source/community/contributors.md +msgid "111" +msgstr "111" + +#: ../../source/community/contributors.md +msgid "[@pjgao](https://github.com/pjgao)" +msgstr "[@pjgao](https://github.com/pjgao)" + +#: ../../source/community/contributors.md +msgid "" +"[149bed1](https://github.com/vllm-project/vllm-" +"ascend/commit/149bed15d6da19cab194dc842fc12419cbef088b)" +msgstr "" +"[149bed1](https://github.com/vllm-project/vllm-" +"ascend/commit/149bed15d6da19cab194dc842fc12419cbef088b)" + +#: ../../source/community/contributors.md +msgid "110" +msgstr "110" + +#: ../../source/community/contributors.md +msgid "[@lbk-sys](https://github.com/lbk-sys)" +msgstr "[@lbk-sys](https://github.com/lbk-sys)" + +#: ../../source/community/contributors.md +msgid "" +"[3c7c284](https://github.com/vllm-project/vllm-" +"ascend/commit/3c7c284b980eb224c6ab231061c7e26b24534922)" +msgstr "" +"[3c7c284](https://github.com/vllm-project/vllm-" +"ascend/commit/3c7c284b980eb224c6ab231061c7e26b24534922)" + +#: ../../source/community/contributors.md +msgid "109" +msgstr "109" + +#: ../../source/community/contributors.md +msgid "[@CaveNightingale](https://github.com/CaveNightingale)" +msgstr "[@CaveNightingale](https://github.com/CaveNightingale)" + +#: ../../source/community/contributors.md +msgid "" +"[fbb9326](https://github.com/vllm-project/vllm-" +"ascend/commit/fbb9326f8f1e0d69c280655006eebf589e9c8ad2)" +msgstr "" +"[fbb9326](https://github.com/vllm-project/vllm-" +"ascend/commit/fbb9326f8f1e0d69c280655006eebf589e9c8ad2)" + +#: ../../source/community/contributors.md +msgid "108" +msgstr "108" + +#: ../../source/community/contributors.md +msgid "[@SlightwindSec](https://github.com/SlightwindSec)" +msgstr "[@SlightwindSec](https://github.com/SlightwindSec)" + +#: ../../source/community/contributors.md +msgid "2025/07/25" +msgstr "2025/07/25" + +#: ../../source/community/contributors.md +msgid "" +"[515237a](https://github.com/vllm-project/vllm-" +"ascend/commit/515237ac6ec7b9b65b81602d9e017b9120a889ae)" +msgstr "" +"[515237a](https://github.com/vllm-project/vllm-" +"ascend/commit/515237ac6ec7b9b65b81602d9e017b9120a889ae)" + +#: ../../source/community/contributors.md +msgid "107" +msgstr "107" + +#: ../../source/community/contributors.md +msgid "[@Ronald1995](https://github.com/Ronald1995)" +msgstr "[@Ronald1995](https://github.com/Ronald1995)" + +#: ../../source/community/contributors.md +msgid "" +"[e561a2c](https://github.com/vllm-project/vllm-" +"ascend/commit/e561a2c6ec4493b490b13a4a9007d8f451ae0d0f)" +msgstr "" +"[e561a2c](https://github.com/vllm-project/vllm-" +"ascend/commit/e561a2c6ec4493b490b13a4a9007d8f451ae0d0f)" + +#: ../../source/community/contributors.md +msgid "106" +msgstr "106" + +#: ../../source/community/contributors.md +msgid "[@ZrBac](https://github.com/ZrBac)" +msgstr "[@ZrBac](https://github.com/ZrBac)" + +#: ../../source/community/contributors.md +msgid "2025/07/24" +msgstr "2025/07/24" + +#: ../../source/community/contributors.md +msgid "" +"[2ffe051](https://github.com/vllm-project/vllm-" +"ascend/commit/2ffe051859d585df8353d1b9eefb64c44078175a)" +msgstr "" +"[2ffe051](https://github.com/vllm-project/vllm-" +"ascend/commit/2ffe051859d585df8353d1b9eefb64c44078175a)" + +#: ../../source/community/contributors.md +msgid "105" +msgstr "105" + +#: ../../source/community/contributors.md +msgid "[@SunnyLee151064](https://github.com/SunnyLee151064)" +msgstr "[@SunnyLee151064](https://github.com/SunnyLee151064)" + +#: ../../source/community/contributors.md +msgid "" +"[34571ea](https://github.com/vllm-project/vllm-" +"ascend/commit/34571ea5ae69529758edf75f0252f86ccb4c7184)" +msgstr "" +"[34571ea](https://github.com/vllm-project/vllm-" +"ascend/commit/34571ea5ae69529758edf75f0252f86ccb4c7184)" + +#: ../../source/community/contributors.md +msgid "104" +msgstr "104" + +#: ../../source/community/contributors.md +msgid "[@raindaywhu](https://github.com/raindaywhu)" +msgstr "[@raindaywhu](https://github.com/raindaywhu)" + +#: ../../source/community/contributors.md +msgid "" +"[b235fb7](https://github.com/vllm-project/vllm-" +"ascend/commit/b235fb738df680894604b6a3b68b68ed43f4de9a)" +msgstr "" +"[b235fb7](https://github.com/vllm-project/vllm-" +"ascend/commit/b235fb738df680894604b6a3b68b68ed43f4de9a)" + +#: ../../source/community/contributors.md +msgid "103" +msgstr "103" + +#: ../../source/community/contributors.md +msgid "[@lilinsiman](https://github.com/lilinsiman)" +msgstr "[@lilinsiman](https://github.com/lilinsiman)" + +#: ../../source/community/contributors.md +msgid "" +"[718c544](https://github.com/vllm-project/vllm-" +"ascend/commit/718c544e11dc71c8d0ddde6159affcdeeac109b5)" +msgstr "" +"[718c544](https://github.com/vllm-project/vllm-" +"ascend/commit/718c544e11dc71c8d0ddde6159affcdeeac109b5)" + +#: ../../source/community/contributors.md +msgid "102" +msgstr "102" + +#: ../../source/community/contributors.md +msgid "[@shiyuan680](https://github.com/shiyuan680)" +msgstr "[@shiyuan680](https://github.com/shiyuan680)" + +#: ../../source/community/contributors.md +msgid "2025/07/23" +msgstr "2025/07/23" + +#: ../../source/community/contributors.md +msgid "" +"[ac0bf13](https://github.com/vllm-project/vllm-" +"ascend/commit/ac0bf133f47ead20f18bf71f9be6dbe05fbd218f)" +msgstr "" +"[ac0bf13](https://github.com/vllm-project/vllm-" +"ascend/commit/ac0bf133f47ead20f18bf71f9be6dbe05fbd218f)" + +#: ../../source/community/contributors.md +msgid "101" +msgstr "101" + +#: ../../source/community/contributors.md +msgid "[@shaopeng-666](https://github.com/shaopeng-666)" +msgstr "[@shaopeng-666](https://github.com/shaopeng-666)" + +#: ../../source/community/contributors.md +msgid "" +"[54b1753](https://github.com/vllm-project/vllm-" +"ascend/commit/54b1753cf56945e88c9d2449a7d6b6adf8638ead)" +msgstr "" +"[54b1753](https://github.com/vllm-project/vllm-" +"ascend/commit/54b1753cf56945e88c9d2449a7d6b6adf8638ead)" + +#: ../../source/community/contributors.md +msgid "100" +msgstr "100" + +#: ../../source/community/contributors.md +msgid "[@loukong33](https://github.com/loukong33)" +msgstr "[@loukong33](https://github.com/loukong33)" + +#: ../../source/community/contributors.md +msgid "2025/07/22" +msgstr "2025/07/22" + +#: ../../source/community/contributors.md +msgid "" +"[b73c701](https://github.com/vllm-project/vllm-" +"ascend/commit/b73c701a6038d14a65db099e0d2da9f61f239044)" +msgstr "" +"[b73c701](https://github.com/vllm-project/vllm-" +"ascend/commit/b73c701a6038d14a65db099e0d2da9f61f239044)" + +#: ../../source/community/contributors.md +msgid "99" +msgstr "99" + +#: ../../source/community/contributors.md +msgid "[@aidoczh](https://github.com/aidoczh)" +msgstr "[@aidoczh](https://github.com/aidoczh)" + +#: ../../source/community/contributors.md +msgid "2025/07/21" +msgstr "2025/07/21" + +#: ../../source/community/contributors.md +msgid "" +"[c32eea9](https://github.com/vllm-project/vllm-" +"ascend/commit/c32eea96b73d26268070f57ef98416decc98aff7)" +msgstr "" +"[c32eea9](https://github.com/vllm-project/vllm-" +"ascend/commit/c32eea96b73d26268070f57ef98416decc98aff7)" + +#: ../../source/community/contributors.md +msgid "98" +msgstr "98" + +#: ../../source/community/contributors.md +msgid "[@nuclearwu](https://github.com/nuclearwu)" +msgstr "[@nuclearwu](https://github.com/nuclearwu)" + +#: ../../source/community/contributors.md +msgid "2025/07/20" +msgstr "2025/07/20" + +#: ../../source/community/contributors.md +msgid "" +"[54f2b31](https://github.com/vllm-project/vllm-" +"ascend/commit/54f2b311848badc86371d269140e729012a60f2c)" +msgstr "" +"[54f2b31](https://github.com/vllm-project/vllm-" +"ascend/commit/54f2b311848badc86371d269140e729012a60f2c)" + +#: ../../source/community/contributors.md +msgid "97" +msgstr "97" + +#: ../../source/community/contributors.md +msgid "[@pkking](https://github.com/pkking)" +msgstr "[@pkking](https://github.com/pkking)" + +#: ../../source/community/contributors.md +msgid "2025/07/18" +msgstr "2025/07/18" + +#: ../../source/community/contributors.md +msgid "" +"[3e39d72](https://github.com/vllm-project/vllm-" +"ascend/commit/3e39d7234c0e5c66b184c136c602e87272b5a36e)" +msgstr "" +"[3e39d72](https://github.com/vllm-project/vllm-" +"ascend/commit/3e39d7234c0e5c66b184c136c602e87272b5a36e)" + +#: ../../source/community/contributors.md +msgid "96" +msgstr "96" + +#: ../../source/community/contributors.md +msgid "[@lianyiibo](https://github.com/lianyiibo)" +msgstr "[@lianyiibo](https://github.com/lianyiibo)" + +#: ../../source/community/contributors.md +msgid "" +"[53d2ea3](https://github.com/vllm-project/vllm-" +"ascend/commit/53d2ea3789ffce32bf3ceb055d5582d28eadc6c7)" +msgstr "" +"[53d2ea3](https://github.com/vllm-project/vllm-" +"ascend/commit/53d2ea3789ffce32bf3ceb055d5582d28eadc6c7)" + +#: ../../source/community/contributors.md +msgid "95" +msgstr "95" + +#: ../../source/community/contributors.md +msgid "[@zheliuyu](https://github.com/zheliuyu)" +msgstr "[@zheliuyu](https://github.com/zheliuyu)" + +#: ../../source/community/contributors.md +msgid "2025/07/15" +msgstr "2025/07/15" + +#: ../../source/community/contributors.md +msgid "" +"[a2a6377](https://github.com/vllm-project/vllm-" +"ascend/commit/a2a6377d45376b4f09bf6141f27284ca013ac075)" +msgstr "" +"[a2a6377](https://github.com/vllm-project/vllm-" +"ascend/commit/a2a6377d45376b4f09bf6141f27284ca013ac075)" + +#: ../../source/community/contributors.md +msgid "94" +msgstr "94" + +#: ../../source/community/contributors.md +msgid "[@NNUCJ](https://github.com/NNUCJ)" +msgstr "[@NNUCJ](https://github.com/NNUCJ)" + +#: ../../source/community/contributors.md +msgid "2025/07/10" +msgstr "2025/07/10" + +#: ../../source/community/contributors.md +msgid "" +"[3b99491](https://github.com/vllm-project/vllm-" +"ascend/commit/3b994919876a40030eb1045e01dd7bc3c7bac6a6)" +msgstr "" +"[3b99491](https://github.com/vllm-project/vllm-" +"ascend/commit/3b994919876a40030eb1045e01dd7bc3c7bac6a6)" + +#: ../../source/community/contributors.md +msgid "93" +msgstr "93" + +#: ../../source/community/contributors.md +msgid "[@ZhengWG](https://github.com/ZhengWG)" +msgstr "[@ZhengWG](https://github.com/ZhengWG)" + +#: ../../source/community/contributors.md +msgid "2025/07/07" +msgstr "2025/07/07" + +#: ../../source/community/contributors.md +msgid "" +"[9c886d0](https://github.com/vllm-project/vllm-" +"ascend/commit/9c886d0a1f0fc011692090b0395d734c83a469de)" +msgstr "" +"[9c886d0](https://github.com/vllm-project/vllm-" +"ascend/commit/9c886d0a1f0fc011692090b0395d734c83a469de)" + +#: ../../source/community/contributors.md +msgid "92" +msgstr "92" + +#: ../../source/community/contributors.md +msgid "[@wm901115nwpu](https://github.com/wm901115nwpu)" +msgstr "[@wm901115nwpu](https://github.com/wm901115nwpu)" + +#: ../../source/community/contributors.md +msgid "" +"[f08c4f1](https://github.com/vllm-project/vllm-" +"ascend/commit/f08c4f15a27f0f27132f4ca7a0c226bf0a2a47d4)" +msgstr "" +"[f08c4f1](https://github.com/vllm-project/vllm-" +"ascend/commit/f08c4f15a27f0f27132f4ca7a0c226bf0a2a47d4)" + +#: ../../source/community/contributors.md +msgid "91" +msgstr "91" + +#: ../../source/community/contributors.md +msgid "[@JC-ut0](https://github.com/JC-ut0)" +msgstr "[@JC-ut0](https://github.com/JC-ut0)" + +#: ../../source/community/contributors.md +msgid "2025/07/04" +msgstr "2025/07/04" + +#: ../../source/community/contributors.md +msgid "" +"[e1d282d](https://github.com/vllm-project/vllm-" +"ascend/commit/e1d282d7cc017f7e8075074a6981532045801a73)" +msgstr "" +"[e1d282d](https://github.com/vllm-project/vllm-" +"ascend/commit/e1d282d7cc017f7e8075074a6981532045801a73)" + +#: ../../source/community/contributors.md +msgid "90" +msgstr "90" + +#: ../../source/community/contributors.md +msgid "[@Irving11-BKN](https://github.com/Irving11-BKN)" +msgstr "[@Irving11-BKN](https://github.com/Irving11-BKN)" + +#: ../../source/community/contributors.md +msgid "2025/07/03" +msgstr "2025/07/03" + +#: ../../source/community/contributors.md +msgid "" +"[3ea2410](https://github.com/vllm-project/vllm-" +"ascend/commit/3ea24101416675663d2c3c2874054b9dd3451bd0)" +msgstr "" +"[3ea2410](https://github.com/vllm-project/vllm-" +"ascend/commit/3ea24101416675663d2c3c2874054b9dd3451bd0)" + +#: ../../source/community/contributors.md +msgid "89" +msgstr "89" + +#: ../../source/community/contributors.md +msgid "[@zhanghw0354](https://github.com/zhanghw0354)" +msgstr "[@zhanghw0354](https://github.com/zhanghw0354)" + +#: ../../source/community/contributors.md +msgid "2025/07/02" +msgstr "2025/07/02" + +#: ../../source/community/contributors.md +msgid "" +"[9fb3d55](https://github.com/vllm-project/vllm-" +"ascend/commit/9fb3d558e5b57a3c97ee5e11b9f5dba6ad3df9a5)" +msgstr "" +"[9fb3d55](https://github.com/vllm-project/vllm-" +"ascend/commit/9fb3d558e5b57a3c97ee5e11b9f5dba6ad3df9a5)" + +#: ../../source/community/contributors.md +msgid "88" +msgstr "88" + +#: ../../source/community/contributors.md +msgid "[@xudongLi-cmss](https://github.com/xudongLi-cmss)" +msgstr "[@xudongLi-cmss](https://github.com/xudongLi-cmss)" + +#: ../../source/community/contributors.md +msgid "" +"[7fc1a98](https://github.com/vllm-project/vllm-" +"ascend/commit/7fc1a984890bd930f670deedcb2dda3a46f84576)" +msgstr "" +"[7fc1a98](https://github.com/vllm-project/vllm-" +"ascend/commit/7fc1a984890bd930f670deedcb2dda3a46f84576)" + +#: ../../source/community/contributors.md +msgid "87" +msgstr "87" + +#: ../../source/community/contributors.md +msgid "[@GDzhu01](https://github.com/GDzhu01)" +msgstr "[@GDzhu01](https://github.com/GDzhu01)" + +#: ../../source/community/contributors.md +msgid "2025/06/28" +msgstr "2025/06/28" + +#: ../../source/community/contributors.md +msgid "" +"[b308a7a](https://github.com/vllm-project/vllm-" +"ascend/commit/b308a7a25897b88d4a23a9e3d583f4ec6de256ac)" +msgstr "" +"[b308a7a](https://github.com/vllm-project/vllm-" +"ascend/commit/b308a7a25897b88d4a23a9e3d583f4ec6de256ac)" + +#: ../../source/community/contributors.md +msgid "86" +msgstr "86" + +#: ../../source/community/contributors.md +msgid "[@zeshengzong](https://github.com/zeshengzong)" +msgstr "[@zeshengzong](https://github.com/zeshengzong)" + +#: ../../source/community/contributors.md +msgid "2025/06/26" +msgstr "2025/06/26" + +#: ../../source/community/contributors.md +msgid "" +"[192dbbc](https://github.com/vllm-project/vllm-" +"ascend/commit/192dbbcc6e244a8471d3c00033dc637233ee25aa)" +msgstr "" +"[192dbbc](https://github.com/vllm-project/vllm-" +"ascend/commit/192dbbcc6e244a8471d3c00033dc637233ee25aa)" + +#: ../../source/community/contributors.md +msgid "85" +msgstr "85" + +#: ../../source/community/contributors.md +msgid "[@leo-pony](https://github.com/leo-pony)" +msgstr "[@leo-pony](https://github.com/leo-pony)" + +#: ../../source/community/contributors.md +msgid "" +"[1025344](https://github.com/vllm-project/vllm-" +"ascend/commit/10253449120307e3b45f99d82218ba53e3f2a5f2)" +msgstr "" +"[1025344](https://github.com/vllm-project/vllm-" +"ascend/commit/10253449120307e3b45f99d82218ba53e3f2a5f2)" + +#: ../../source/community/contributors.md +msgid "84" +msgstr "84" + +#: ../../source/community/contributors.md +msgid "[@sharonyunyun](https://github.com/sharonyunyun)" +msgstr "[@sharonyunyun](https://github.com/sharonyunyun)" + +#: ../../source/community/contributors.md +msgid "2025/06/25" +msgstr "2025/06/25" + +#: ../../source/community/contributors.md +msgid "" +"[941269a](https://github.com/vllm-project/vllm-" +"ascend/commit/941269a6c5bbc79f6c1b6abd4680dc5802dd8666)" +msgstr "" +"[941269a](https://github.com/vllm-project/vllm-" +"ascend/commit/941269a6c5bbc79f6c1b6abd4680dc5802dd8666)" + +#: ../../source/community/contributors.md +msgid "83" +msgstr "83" + +#: ../../source/community/contributors.md +msgid "[@Pr0Wh1teGivee](https://github.com/Pr0Wh1teGivee)" +msgstr "[@Pr0Wh1teGivee](https://github.com/Pr0Wh1teGivee)" + +#: ../../source/community/contributors.md +msgid "" +"[2fda604](https://github.com/vllm-project/vllm-" +"ascend/commit/2fda60464c287fe456b4a2f27e63996edc65dd40)" +msgstr "" +"[2fda604](https://github.com/vllm-project/vllm-" +"ascend/commit/2fda60464c287fe456b4a2f27e63996edc65dd40)" + +#: ../../source/community/contributors.md +msgid "82" +msgstr "82" + +#: ../../source/community/contributors.md +msgid "[@liziyu179](https://github.com/liziyu179)" +msgstr "[@liziyu179](https://github.com/liziyu179)" + +#: ../../source/community/contributors.md +msgid "2025/06/24" +msgstr "2025/06/24" + +#: ../../source/community/contributors.md +msgid "" +"[6ed3f00](https://github.com/vllm-project/vllm-" +"ascend/commit/6ed3f004274b4c89df2676c888b804e103efaf79)" +msgstr "" +"[6ed3f00](https://github.com/vllm-project/vllm-" +"ascend/commit/6ed3f004274b4c89df2676c888b804e103efaf79)" + +#: ../../source/community/contributors.md +msgid "81" +msgstr "81" + +#: ../../source/community/contributors.md +msgid "[@xleoken](https://github.com/xleoken)" +msgstr "[@xleoken](https://github.com/xleoken)" + +#: ../../source/community/contributors.md +msgid "2025/06/23" +msgstr "2025/06/23" + +#: ../../source/community/contributors.md +msgid "" +"[4447e53](https://github.com/vllm-project/vllm-" +"ascend/commit/4447e53d7ad5edcda978ca6b0a3a26a73c604de0)" +msgstr "" +"[4447e53](https://github.com/vllm-project/vllm-" +"ascend/commit/4447e53d7ad5edcda978ca6b0a3a26a73c604de0)" + +#: ../../source/community/contributors.md +msgid "80" +msgstr "80" + +#: ../../source/community/contributors.md +msgid "[@weiguihua2](https://github.com/weiguihua2)" +msgstr "[@weiguihua2](https://github.com/weiguihua2)" + +#: ../../source/community/contributors.md +msgid "" +"[e112317](https://github.com/vllm-project/vllm-" +"ascend/commit/e1123172d12afa15f306ba6e1e4c9d0c6d1d799e)" +msgstr "" +"[e112317](https://github.com/vllm-project/vllm-" +"ascend/commit/e1123172d12afa15f306ba6e1e4c9d0c6d1d799e)" + +#: ../../source/community/contributors.md +msgid "79" +msgstr "79" + +#: ../../source/community/contributors.md +msgid "[@lyj-jjj](https://github.com/lyj-jjj)" +msgstr "[@lyj-jjj](https://github.com/lyj-jjj)" + +#: ../../source/community/contributors.md +msgid "" +"[5177bef](https://github.com/vllm-project/vllm-" +"ascend/commit/5177bef87a21331dcca11159d3d1438075cbd74e)" +msgstr "" +"[5177bef](https://github.com/vllm-project/vllm-" +"ascend/commit/5177bef87a21331dcca11159d3d1438075cbd74e)" + +#: ../../source/community/contributors.md +msgid "78" +msgstr "78" + +#: ../../source/community/contributors.md +msgid "[@zhoux77899](https://github.com/zhoux77899)" +msgstr "[@zhoux77899](https://github.com/zhoux77899)" + +#: ../../source/community/contributors.md +msgid "2025/06/21" +msgstr "2025/06/21" + +#: ../../source/community/contributors.md +msgid "" +"[a927bee](https://github.com/vllm-project/vllm-" +"ascend/commit/a927bee4f098e7b95a255a064c95adea73ee2a50)" +msgstr "" +"[a927bee](https://github.com/vllm-project/vllm-" +"ascend/commit/a927bee4f098e7b95a255a064c95adea73ee2a50)" + +#: ../../source/community/contributors.md +msgid "77" +msgstr "77" + +#: ../../source/community/contributors.md +msgid "[@yuancaoyaoHW](https://github.com/yuancaoyaoHW)" +msgstr "[@yuancaoyaoHW](https://github.com/yuancaoyaoHW)" + +#: ../../source/community/contributors.md +msgid "2025/06/20" +msgstr "2025/06/20" + +#: ../../source/community/contributors.md +msgid "" +"[00ae250](https://github.com/vllm-project/vllm-" +"ascend/commit/00ae250f3ced68317bc91c93dc1f1a0977aa0b94)" +msgstr "" +"[00ae250](https://github.com/vllm-project/vllm-" +"ascend/commit/00ae250f3ced68317bc91c93dc1f1a0977aa0b94)" + +#: ../../source/community/contributors.md +msgid "76" +msgstr "76" + +#: ../../source/community/contributors.md +msgid "[@shikang-hangzhou](https://github.com/shikang-hangzhou)" +msgstr "[@shikang-hangzhou](https://github.com/shikang-hangzhou)" + +#: ../../source/community/contributors.md +msgid "" +"[f1353d5](https://github.com/vllm-project/vllm-" +"ascend/commit/f1353d51b32ddf22444b0608b3d7f60fed048ac4)" +msgstr "" +"[f1353d5](https://github.com/vllm-project/vllm-" +"ascend/commit/f1353d51b32ddf22444b0608b3d7f60fed048ac4)" + +#: ../../source/community/contributors.md +msgid "75" +msgstr "75" + +#: ../../source/community/contributors.md +msgid "[@pichangping](https://github.com/pichangping)" +msgstr "[@pichangping](https://github.com/pichangping)" + +#: ../../source/community/contributors.md +msgid "" +"[53ce4a0](https://github.com/vllm-project/vllm-" +"ascend/commit/53ce4a0ad232bf36fd40da059b0cfc02c34669ac)" +msgstr "" +"[53ce4a0](https://github.com/vllm-project/vllm-" +"ascend/commit/53ce4a0ad232bf36fd40da059b0cfc02c34669ac)" + +#: ../../source/community/contributors.md +msgid "74" +msgstr "74" + +#: ../../source/community/contributors.md +msgid "[@farawayboat](https://github.com/farawayboat)" +msgstr "[@farawayboat](https://github.com/farawayboat)" + +#: ../../source/community/contributors.md +msgid "" +"[5ab6fdd](https://github.com/vllm-project/vllm-" +"ascend/commit/5ab6fdd0edf149a14742f3c7a214cf5f9f792441)" +msgstr "" +"[5ab6fdd](https://github.com/vllm-project/vllm-" +"ascend/commit/5ab6fdd0edf149a14742f3c7a214cf5f9f792441)" + +#: ../../source/community/contributors.md +msgid "73" +msgstr "73" + +#: ../../source/community/contributors.md +msgid "[@underfituu](https://github.com/underfituu)" +msgstr "[@underfituu](https://github.com/underfituu)" + +#: ../../source/community/contributors.md +msgid "2025/06/19" +msgstr "2025/06/19" + +#: ../../source/community/contributors.md +msgid "" +"[e2c9ecb](https://github.com/vllm-project/vllm-" +"ascend/commit/e2c9ecb8029731c0f86199e8371d3af7555504a7)" +msgstr "" +"[e2c9ecb](https://github.com/vllm-project/vllm-" +"ascend/commit/e2c9ecb8029731c0f86199e8371d3af7555504a7)" + +#: ../../source/community/contributors.md +msgid "72" +msgstr "72" + +#: ../../source/community/contributors.md +msgid "[@songshanhu07](https://github.com/songshanhu07)" +msgstr "[@songshanhu07](https://github.com/songshanhu07)" + +#: ../../source/community/contributors.md +msgid "2025/06/18" +msgstr "2025/06/18" + +#: ../../source/community/contributors.md +msgid "" +"[ebb2a70](https://github.com/vllm-project/vllm-" +"ascend/commit/ebb2a70dbbdb8f55002de3313e17dfd595e1de1f)" +msgstr "" +"[ebb2a70](https://github.com/vllm-project/vllm-" +"ascend/commit/ebb2a70dbbdb8f55002de3313e17dfd595e1de1f)" + +#: ../../source/community/contributors.md +msgid "71" +msgstr "71" + +#: ../../source/community/contributors.md +msgid "[@zhuo97](https://github.com/zhuo97)" +msgstr "[@zhuo97](https://github.com/zhuo97)" + +#: ../../source/community/contributors.md +msgid "2025/06/16" +msgstr "2025/06/16" + +#: ../../source/community/contributors.md +msgid "" +"[f5404dc](https://github.com/vllm-project/vllm-" +"ascend/commit/f5404dc650882c6f0423db9e87f9b38f756211c5)" +msgstr "" +"[f5404dc](https://github.com/vllm-project/vllm-" +"ascend/commit/f5404dc650882c6f0423db9e87f9b38f756211c5)" + +#: ../../source/community/contributors.md +msgid "70" +msgstr "70" + +#: ../../source/community/contributors.md +msgid "[@henryxuxu0716](https://github.com/henryxuxu0716)" +msgstr "[@henryxuxu0716](https://github.com/henryxuxu0716)" + +#: ../../source/community/contributors.md +msgid "2025/06/14" +msgstr "2025/06/14" + +#: ../../source/community/contributors.md +msgid "" +"[38692b5](https://github.com/vllm-project/vllm-" +"ascend/commit/38692b5c005fa05625e05f8df600d7df712ecd47)" +msgstr "" +"[38692b5](https://github.com/vllm-project/vllm-" +"ascend/commit/38692b5c005fa05625e05f8df600d7df712ecd47)" + +#: ../../source/community/contributors.md +msgid "69" +msgstr "69" + +#: ../../source/community/contributors.md +msgid "[@fems14](https://github.com/fems14)" +msgstr "[@fems14](https://github.com/fems14)" + +#: ../../source/community/contributors.md +msgid "" +"[ab5d110](https://github.com/vllm-project/vllm-" +"ascend/commit/ab5d110fcc35ca11330977450141b1d7176f21e7)" +msgstr "" +"[ab5d110](https://github.com/vllm-project/vllm-" +"ascend/commit/ab5d110fcc35ca11330977450141b1d7176f21e7)" + +#: ../../source/community/contributors.md +msgid "68" +msgstr "68" + +#: ../../source/community/contributors.md +msgid "[@wangyanhui-cmss](https://github.com/wangyanhui-cmss)" +msgstr "[@wangyanhui-cmss](https://github.com/wangyanhui-cmss)" + +#: ../../source/community/contributors.md +msgid "2025/06/12" +msgstr "2025/06/12" + +#: ../../source/community/contributors.md +msgid "" +"[c6e2a5f](https://github.com/vllm-project/vllm-" +"ascend/commit/c6e2a5fb4014b863cee6abc3009f5bc5340c9e88)" +msgstr "" +"[c6e2a5f](https://github.com/vllm-project/vllm-" +"ascend/commit/c6e2a5fb4014b863cee6abc3009f5bc5340c9e88)" + +#: ../../source/community/contributors.md +msgid "67" +msgstr "67" + +#: ../../source/community/contributors.md +msgid "[@yzim](https://github.com/yzim)" +msgstr "[@yzim](https://github.com/yzim)" + +#: ../../source/community/contributors.md +msgid "2025/06/11" +msgstr "2025/06/11" + +#: ../../source/community/contributors.md +msgid "" +"[4153a50](https://github.com/vllm-project/vllm-" +"ascend/commit/4153a5091b698c2270d160409e7fee73baaf701b)" +msgstr "" +"[4153a50](https://github.com/vllm-project/vllm-" +"ascend/commit/4153a5091b698c2270d160409e7fee73baaf701b)" + +#: ../../source/community/contributors.md +msgid "66" +msgstr "66" + +#: ../../source/community/contributors.md +msgid "[@chenwaner](https://github.com/chenwaner)" +msgstr "[@chenwaner](https://github.com/chenwaner)" + +#: ../../source/community/contributors.md +msgid "" +"[e46dc14](https://github.com/vllm-project/vllm-" +"ascend/commit/e46dc142bf1180453c64226d76854fc1ec696169)" +msgstr "" +"[e46dc14](https://github.com/vllm-project/vllm-" +"ascend/commit/e46dc142bf1180453c64226d76854fc1ec696169)" + +#: ../../source/community/contributors.md +msgid "65" +msgstr "65" + +#: ../../source/community/contributors.md +msgid "[@Yuxiao-Xu](https://github.com/Yuxiao-Xu)" +msgstr "[@Yuxiao-Xu](https://github.com/Yuxiao-Xu)" + +#: ../../source/community/contributors.md +msgid "2025/06/09" +msgstr "2025/06/09" + +#: ../../source/community/contributors.md +msgid "" +"[6b853f1](https://github.com/vllm-project/vllm-" +"ascend/commit/6b853f15fe69ba335d2745ebcf14a164d0bcc505)" +msgstr "" +"[6b853f1](https://github.com/vllm-project/vllm-" +"ascend/commit/6b853f15fe69ba335d2745ebcf14a164d0bcc505)" + +#: ../../source/community/contributors.md +msgid "64" +msgstr "64" + +#: ../../source/community/contributors.md +msgid "[@zxdukki](https://github.com/zxdukki)" +msgstr "[@zxdukki](https://github.com/zxdukki)" + +#: ../../source/community/contributors.md +msgid "2025/06/07" +msgstr "2025/06/07" + +#: ../../source/community/contributors.md +msgid "" +"[87ebaef](https://github.com/vllm-project/vllm-" +"ascend/commit/87ebaef4e4e519988f27a6aa378f614642202ecf)" +msgstr "" +"[87ebaef](https://github.com/vllm-project/vllm-" +"ascend/commit/87ebaef4e4e519988f27a6aa378f614642202ecf)" + +#: ../../source/community/contributors.md +msgid "63" +msgstr "63" + +#: ../../source/community/contributors.md +msgid "" +"[e9ada68](https://github.com/vllm-project/vllm-" +"ascend/commit/e9ada685ece798f9fe0d4a287e3f5246a8a7207b)" +msgstr "" +"[e9ada68](https://github.com/vllm-project/vllm-" +"ascend/commit/e9ada685ece798f9fe0d4a287e3f5246a8a7207b)" + +#: ../../source/community/contributors.md +msgid "62" +msgstr "62" + +#: ../../source/community/contributors.md +msgid "[@sdmyzlp](https://github.com/sdmyzlp)" +msgstr "[@sdmyzlp](https://github.com/sdmyzlp)" + +#: ../../source/community/contributors.md +msgid "" +"[3640c60](https://github.com/vllm-project/vllm-" +"ascend/commit/3640c60b0eb4d4cb104e20bfa406d3f1d17920a7)" +msgstr "" +"[3640c60](https://github.com/vllm-project/vllm-" +"ascend/commit/3640c60b0eb4d4cb104e20bfa406d3f1d17920a7)" + +#: ../../source/community/contributors.md +msgid "61" +msgstr "61" + +#: ../../source/community/contributors.md +msgid "[@ChenTaoyu-SJTU](https://github.com/ChenTaoyu-SJTU)" +msgstr "[@ChenTaoyu-SJTU](https://github.com/ChenTaoyu-SJTU)" + +#: ../../source/community/contributors.md +msgid "" +"[20dedba](https://github.com/vllm-project/vllm-" +"ascend/commit/20dedba5d1fc84b7ae8b49f9ce3e3649389e2193)" +msgstr "" +"[20dedba](https://github.com/vllm-project/vllm-" +"ascend/commit/20dedba5d1fc84b7ae8b49f9ce3e3649389e2193)" + +#: ../../source/community/contributors.md +msgid "60" +msgstr "60" + +#: ../../source/community/contributors.md +msgid "[@hahazhky](https://github.com/hahazhky)" +msgstr "[@hahazhky](https://github.com/hahazhky)" + +#: ../../source/community/contributors.md +msgid "2025/06/06" +msgstr "2025/06/06" + +#: ../../source/community/contributors.md +msgid "" +"[0b12c2a](https://github.com/vllm-project/vllm-" +"ascend/commit/0b12c2acf7d9fd192beebebf662298067d9a5435)" +msgstr "" +"[0b12c2a](https://github.com/vllm-project/vllm-" +"ascend/commit/0b12c2acf7d9fd192beebebf662298067d9a5435)" + +#: ../../source/community/contributors.md +msgid "59" +msgstr "59" + +#: ../../source/community/contributors.md +msgid "[@depeng1994](https://github.com/depeng1994)" +msgstr "[@depeng1994](https://github.com/depeng1994)" + +#: ../../source/community/contributors.md +msgid "" +"[6b094a2](https://github.com/vllm-project/vllm-" +"ascend/commit/6b094a2bd49a8a41eb3647568b2d9e5b337db81f)" +msgstr "" +"[6b094a2](https://github.com/vllm-project/vllm-" +"ascend/commit/6b094a2bd49a8a41eb3647568b2d9e5b337db81f)" + +#: ../../source/community/contributors.md +msgid "58" +msgstr "58" + +#: ../../source/community/contributors.md +msgid "[@momo609](https://github.com/momo609)" +msgstr "[@momo609](https://github.com/momo609)" + +#: ../../source/community/contributors.md +msgid "2025/06/05" +msgstr "2025/06/05" + +#: ../../source/community/contributors.md +msgid "" +"[908a851](https://github.com/vllm-project/vllm-" +"ascend/commit/908a851a776cfd9051cc062119e6ec481561c6f7)" +msgstr "" +"[908a851](https://github.com/vllm-project/vllm-" +"ascend/commit/908a851a776cfd9051cc062119e6ec481561c6f7)" + +#: ../../source/community/contributors.md +msgid "57" +msgstr "57" + +#: ../../source/community/contributors.md +msgid "[@David9857](https://github.com/David9857)" +msgstr "[@David9857](https://github.com/David9857)" + +#: ../../source/community/contributors.md +msgid "" +"[78431b3](https://github.com/vllm-project/vllm-" +"ascend/commit/78431b34694dfa3c8f54ed7cc626660318557927)" +msgstr "" +"[78431b3](https://github.com/vllm-project/vllm-" +"ascend/commit/78431b34694dfa3c8f54ed7cc626660318557927)" + +#: ../../source/community/contributors.md +msgid "56" +msgstr "56" + +#: ../../source/community/contributors.md +msgid "[@NINGBENZHE](https://github.com/NINGBENZHE)" +msgstr "[@NINGBENZHE](https://github.com/NINGBENZHE)" + +#: ../../source/community/contributors.md +msgid "2025/06/03" +msgstr "2025/06/03" + +#: ../../source/community/contributors.md +msgid "" +"[6ec64a3](https://github.com/vllm-project/vllm-" +"ascend/commit/6ec64a3f9686df65b5a23a41aa301e669db19099)" +msgstr "" +"[6ec64a3](https://github.com/vllm-project/vllm-" +"ascend/commit/6ec64a3f9686df65b5a23a41aa301e669db19099)" + +#: ../../source/community/contributors.md +msgid "55" +msgstr "55" + +#: ../../source/community/contributors.md +msgid "[@XWFAlone](https://github.com/XWFAlone)" +msgstr "[@XWFAlone](https://github.com/XWFAlone)" + +#: ../../source/community/contributors.md +msgid "2025/05/30" +msgstr "2025/05/30" + +#: ../../source/community/contributors.md +msgid "" +"[3442fbd](https://github.com/vllm-project/vllm-" +"ascend/commit/3442fbdb235b4c6d72c2bc64a49707a7bd89958e)" +msgstr "" +"[3442fbd](https://github.com/vllm-project/vllm-" +"ascend/commit/3442fbdb235b4c6d72c2bc64a49707a7bd89958e)" + +#: ../../source/community/contributors.md +msgid "54" +msgstr "54" + +#: ../../source/community/contributors.md +msgid "[@YisongJiang](https://github.com/YisongJiang)" +msgstr "[@YisongJiang](https://github.com/YisongJiang)" + +#: ../../source/community/contributors.md +msgid "2025/05/29" +msgstr "2025/05/29" + +#: ../../source/community/contributors.md +msgid "" +"[90afaf6](https://github.com/vllm-project/vllm-" +"ascend/commit/90afaf6306f680307462becf3c78585737579851)" +msgstr "" +"[90afaf6](https://github.com/vllm-project/vllm-" +"ascend/commit/90afaf6306f680307462becf3c78585737579851)" + +#: ../../source/community/contributors.md +msgid "53" +msgstr "53" + +#: ../../source/community/contributors.md +msgid "[@yangpuPKU](https://github.com/yangpuPKU)" +msgstr "[@yangpuPKU](https://github.com/yangpuPKU)" + +#: ../../source/community/contributors.md +msgid "2025/05/23" +msgstr "2025/05/23" + +#: ../../source/community/contributors.md +msgid "" +"[46df67a](https://github.com/vllm-project/vllm-" +"ascend/commit/46df67a5e9ab73fade08cbb2d8c0155cee7316d1)" +msgstr "" +"[46df67a](https://github.com/vllm-project/vllm-" +"ascend/commit/46df67a5e9ab73fade08cbb2d8c0155cee7316d1)" + +#: ../../source/community/contributors.md +msgid "52" +msgstr "52" + +#: ../../source/community/contributors.md +msgid "[@ttanzhiqiang](https://github.com/ttanzhiqiang)" +msgstr "[@ttanzhiqiang](https://github.com/ttanzhiqiang)" + +#: ../../source/community/contributors.md +msgid "" +"[dc6172e](https://github.com/vllm-project/vllm-" +"ascend/commit/dc6172efd3860ce95b40a7b3e93611f875f06d40)" +msgstr "" +"[dc6172e](https://github.com/vllm-project/vllm-" +"ascend/commit/dc6172efd3860ce95b40a7b3e93611f875f06d40)" + +#: ../../source/community/contributors.md +msgid "51" +msgstr "51" + +#: ../../source/community/contributors.md +msgid "[@jiangpeng36](https://github.com/jiangpeng36)" +msgstr "[@jiangpeng36](https://github.com/jiangpeng36)" + +#: ../../source/community/contributors.md +msgid "" +"[df58fb8](https://github.com/vllm-project/vllm-" +"ascend/commit/df58fb80eee24139fc61c495be3ce79cf81b3f73)" +msgstr "" +"[df58fb8](https://github.com/vllm-project/vllm-" +"ascend/commit/df58fb80eee24139fc61c495be3ce79cf81b3f73)" + +#: ../../source/community/contributors.md +msgid "50" +msgstr "50" + +#: ../../source/community/contributors.md +msgid "[@wonderful199082](https://github.com/wonderful199082)" +msgstr "[@wonderful199082](https://github.com/wonderful199082)" + +#: ../../source/community/contributors.md +msgid "2025/05/20" +msgstr "2025/05/20" + +#: ../../source/community/contributors.md +msgid "" +"[5cf9ff1](https://github.com/vllm-project/vllm-" +"ascend/commit/5cf9ff18e91b0b7031c258d71a257b8e24689763)" +msgstr "" +"[5cf9ff1](https://github.com/vllm-project/vllm-" +"ascend/commit/5cf9ff18e91b0b7031c258d71a257b8e24689763)" + +#: ../../source/community/contributors.md +msgid "49" +msgstr "49" + +#: ../../source/community/contributors.md +msgid "[@22dimensions](https://github.com/22dimensions)" +msgstr "[@22dimensions](https://github.com/22dimensions)" + +#: ../../source/community/contributors.md +msgid "2025/05/17" +msgstr "2025/05/17" + +#: ../../source/community/contributors.md +msgid "" +"[a8730e7](https://github.com/vllm-project/vllm-" +"ascend/commit/a8730e7a3c4ac6c4b39a5946c943252fdea6cce5)" +msgstr "" +"[a8730e7](https://github.com/vllm-project/vllm-" +"ascend/commit/a8730e7a3c4ac6c4b39a5946c943252fdea6cce5)" + +#: ../../source/community/contributors.md +msgid "48" +msgstr "48" + +#: ../../source/community/contributors.md +msgid "[@cxcxflying](https://github.com/cxcxflying)" +msgstr "[@cxcxflying](https://github.com/cxcxflying)" + +#: ../../source/community/contributors.md +msgid "2025/05/13" +msgstr "2025/05/13" + +#: ../../source/community/contributors.md +msgid "" +"[e564470](https://github.com/vllm-project/vllm-" +"ascend/commit/e56447033889ca95df512208cab22ef832bfdf07)" +msgstr "" +"[e564470](https://github.com/vllm-project/vllm-" +"ascend/commit/e56447033889ca95df512208cab22ef832bfdf07)" + +#: ../../source/community/contributors.md +msgid "47" +msgstr "47" + +#: ../../source/community/contributors.md +msgid "[@NeverRaR](https://github.com/NeverRaR)" +msgstr "[@NeverRaR](https://github.com/NeverRaR)" + +#: ../../source/community/contributors.md +msgid "2025/05/12" +msgstr "2025/05/12" + +#: ../../source/community/contributors.md +msgid "" +"[efabd72](https://github.com/vllm-project/vllm-" +"ascend/commit/efabd722eb757e49aa309c173bbec91ca8c4ced1)" +msgstr "" +"[efabd72](https://github.com/vllm-project/vllm-" +"ascend/commit/efabd722eb757e49aa309c173bbec91ca8c4ced1)" + +#: ../../source/community/contributors.md +msgid "46" +msgstr "46" + +#: ../../source/community/contributors.md +msgid "[@chris668899](https://github.com/chris668899)" +msgstr "[@chris668899](https://github.com/chris668899)" + +#: ../../source/community/contributors.md +msgid "2025/05/08" +msgstr "2025/05/08" + +#: ../../source/community/contributors.md +msgid "" +"[6c02088](https://github.com/vllm-project/vllm-" +"ascend/commit/6c020883a8332b5c519f4f6502733edd9b391c2b)" +msgstr "" +"[6c02088](https://github.com/vllm-project/vllm-" +"ascend/commit/6c020883a8332b5c519f4f6502733edd9b391c2b)" + +#: ../../source/community/contributors.md +msgid "45" +msgstr "45" + +#: ../../source/community/contributors.md +msgid "[@sunbaosong](https://github.com/sunbaosong)" +msgstr "[@sunbaosong](https://github.com/sunbaosong)" + +#: ../../source/community/contributors.md +msgid "2025/05/06" +msgstr "2025/05/06" + +#: ../../source/community/contributors.md +msgid "" +"[d6bfae8](https://github.com/vllm-project/vllm-" +"ascend/commit/d6bfae8eeebedf677b643b712d367a3a69c9cce4)" +msgstr "" +"[d6bfae8](https://github.com/vllm-project/vllm-" +"ascend/commit/d6bfae8eeebedf677b643b712d367a3a69c9cce4)" + +#: ../../source/community/contributors.md +msgid "44" +msgstr "44" + +#: ../../source/community/contributors.md +msgid "2025/04/29" +msgstr "2025/04/29" + +#: ../../source/community/contributors.md +msgid "" +"[87975fa](https://github.com/vllm-project/vllm-" +"ascend/commit/87975fa058fe3f90d204ded42a08989a8dcb413e)" +msgstr "" +"[87975fa](https://github.com/vllm-project/vllm-" +"ascend/commit/87975fa058fe3f90d204ded42a08989a8dcb413e)" + +#: ../../source/community/contributors.md +msgid "43" +msgstr "43" + +#: ../../source/community/contributors.md +msgid "[@zouyida2052](https://github.com/zouyida2052)" +msgstr "[@zouyida2052](https://github.com/zouyida2052)" + +#: ../../source/community/contributors.md +msgid "2025/04/28" +msgstr "2025/04/28" + +#: ../../source/community/contributors.md +msgid "" +"[b9528e6](https://github.com/vllm-project/vllm-" +"ascend/commit/b9528e6ecdc417cf444e55a0ce4a2bafdef0ea3b)" +msgstr "" +"[b9528e6](https://github.com/vllm-project/vllm-" +"ascend/commit/b9528e6ecdc417cf444e55a0ce4a2bafdef0ea3b)" + +#: ../../source/community/contributors.md +msgid "42" +msgstr "42" + +#: ../../source/community/contributors.md +msgid "[@ZhengJun9](https://github.com/ZhengJun9)" +msgstr "[@ZhengJun9](https://github.com/ZhengJun9)" + +#: ../../source/community/contributors.md +msgid "" +"[1791113](https://github.com/vllm-project/vllm-" +"ascend/commit/17911138c90d78a76bd691e9dcb56763db35b19f)" +msgstr "" +"[1791113](https://github.com/vllm-project/vllm-" +"ascend/commit/17911138c90d78a76bd691e9dcb56763db35b19f)" + +#: ../../source/community/contributors.md +msgid "41" +msgstr "41" + +#: ../../source/community/contributors.md +msgid "[@linfeng-yuan](https://github.com/linfeng-yuan)" +msgstr "[@linfeng-yuan](https://github.com/linfeng-yuan)" + +#: ../../source/community/contributors.md +msgid "" +"[2204e4d](https://github.com/vllm-project/vllm-" +"ascend/commit/2204e4d08f8e10cf9c30154a14eaa5ca956c2acd)" +msgstr "" +"[2204e4d](https://github.com/vllm-project/vllm-" +"ascend/commit/2204e4d08f8e10cf9c30154a14eaa5ca956c2acd)" + +#: ../../source/community/contributors.md +msgid "40" +msgstr "40" + +#: ../../source/community/contributors.md +msgid "2025/04/27" +msgstr "2025/04/27" + +#: ../../source/community/contributors.md +msgid "" +"[fa4a5d9](https://github.com/vllm-project/vllm-" +"ascend/commit/fa4a5d980e8845a88b9162cf169f0a5ab230f8a5)" +msgstr "" +"[fa4a5d9](https://github.com/vllm-project/vllm-" +"ascend/commit/fa4a5d980e8845a88b9162cf169f0a5ab230f8a5)" + +#: ../../source/community/contributors.md +msgid "39" +msgstr "39" + +#: ../../source/community/contributors.md +msgid "[@RongRongStudio](https://github.com/RongRongStudio)" +msgstr "[@RongRongStudio](https://github.com/RongRongStudio)" + +#: ../../source/community/contributors.md +msgid "2025/04/23" +msgstr "2025/04/23" + +#: ../../source/community/contributors.md +msgid "" +"[848e041](https://github.com/vllm-project/vllm-" +"ascend/commit/848e041a54732c923660dd02daf8e9bf439736a2)" +msgstr "" +"[848e041](https://github.com/vllm-project/vllm-" +"ascend/commit/848e041a54732c923660dd02daf8e9bf439736a2)" + +#: ../../source/community/contributors.md +msgid "38" +msgstr "38" + +#: ../../source/community/contributors.md +msgid "[@fakeYan](https://github.com/fakeYan)" +msgstr "[@fakeYan](https://github.com/fakeYan)" + +#: ../../source/community/contributors.md +msgid "" +"[05bdcbe](https://github.com/vllm-project/vllm-" +"ascend/commit/05bdcbeae47c7fcb9b1c30cad059abf1d40b5421)" +msgstr "" +"[05bdcbe](https://github.com/vllm-project/vllm-" +"ascend/commit/05bdcbeae47c7fcb9b1c30cad059abf1d40b5421)" + +#: ../../source/community/contributors.md +msgid "37" +msgstr "37" + +#: ../../source/community/contributors.md +msgid "2025/04/17" +msgstr "2025/04/17" + +#: ../../source/community/contributors.md +msgid "" +"[697908f](https://github.com/vllm-project/vllm-" +"ascend/commit/697908f5cd7c65a3a917ec1a962b0886efc98c7e)" +msgstr "" +"[697908f](https://github.com/vllm-project/vllm-" +"ascend/commit/697908f5cd7c65a3a917ec1a962b0886efc98c7e)" + +#: ../../source/community/contributors.md +msgid "36" +msgstr "36" + +#: ../../source/community/contributors.md +msgid "[@heartStrive1998](https://github.com/heartStrive1998)" +msgstr "[@heartStrive1998](https://github.com/heartStrive1998)" + +#: ../../source/community/contributors.md +msgid "2025/04/16" +msgstr "2025/04/16" + +#: ../../source/community/contributors.md +msgid "" +"[2f15503](https://github.com/vllm-project/vllm-" +"ascend/commit/2f155039dc3997640854daef469bbf0cb77dc6ed)" +msgstr "" +"[2f15503](https://github.com/vllm-project/vllm-" +"ascend/commit/2f155039dc3997640854daef469bbf0cb77dc6ed)" + +#: ../../source/community/contributors.md +msgid "35" +msgstr "35" + +#: ../../source/community/contributors.md +msgid "[@eeethenQ](https://github.com/eeethenQ)" +msgstr "[@eeethenQ](https://github.com/eeethenQ)" + +#: ../../source/community/contributors.md +msgid "2025/04/15" +msgstr "2025/04/15" + +#: ../../source/community/contributors.md +msgid "" +"[44a8301](https://github.com/vllm-project/vllm-" +"ascend/commit/44a8301424ded94dae83e13b837f5bfc0a1bfc15)" +msgstr "" +"[44a8301](https://github.com/vllm-project/vllm-" +"ascend/commit/44a8301424ded94dae83e13b837f5bfc0a1bfc15)" + +#: ../../source/community/contributors.md +msgid "34" +msgstr "34" + +#: ../../source/community/contributors.md +msgid "[@wxsIcey](https://github.com/wxsIcey)" +msgstr "[@wxsIcey](https://github.com/wxsIcey)" + +#: ../../source/community/contributors.md +msgid "2025/04/10" +msgstr "2025/04/10" + +#: ../../source/community/contributors.md +msgid "" +"[d05ea17](https://github.com/vllm-project/vllm-" +"ascend/commit/d05ea17427b82a506b97409a7de8359f18f565f7)" +msgstr "" +"[d05ea17](https://github.com/vllm-project/vllm-" +"ascend/commit/d05ea17427b82a506b97409a7de8359f18f565f7)" + +#: ../../source/community/contributors.md +msgid "33" +msgstr "33" + +#: ../../source/community/contributors.md +msgid "[@yx0716](https://github.com/yx0716)" +msgstr "[@yx0716](https://github.com/yx0716)" + +#: ../../source/community/contributors.md +msgid "2025/04/08" +msgstr "2025/04/08" + +#: ../../source/community/contributors.md +msgid "" +"[5d62393](https://github.com/vllm-project/vllm-" +"ascend/commit/5d6239306be9b0f5ac6dbaa137048c372a92ff20)" +msgstr "" +"[5d62393](https://github.com/vllm-project/vllm-" +"ascend/commit/5d6239306be9b0f5ac6dbaa137048c372a92ff20)" + +#: ../../source/community/contributors.md +msgid "32" +msgstr "32" + +#: ../../source/community/contributors.md +msgid "[@antonlisq](https://github.com/antonlisq)" +msgstr "[@antonlisq](https://github.com/antonlisq)" + +#: ../../source/community/contributors.md +msgid "2025/04/07" +msgstr "2025/04/07" + +#: ../../source/community/contributors.md +msgid "" +"[2b765dc](https://github.com/vllm-project/vllm-" +"ascend/commit/2b765dcc4974b1bafc26ff5da817ce7e652f0eb0)" +msgstr "" +"[2b765dc](https://github.com/vllm-project/vllm-" +"ascend/commit/2b765dcc4974b1bafc26ff5da817ce7e652f0eb0)" + +#: ../../source/community/contributors.md +msgid "31" +msgstr "31" + +#: ../../source/community/contributors.md +msgid "2025/03/28" +msgstr "2025/03/28" + +#: ../../source/community/contributors.md +msgid "" +"[12390af](https://github.com/vllm-project/vllm-" +"ascend/commit/12390af075962456ecc8233d8dcce7064b75f390)" +msgstr "" +"[12390af](https://github.com/vllm-project/vllm-" +"ascend/commit/12390af075962456ecc8233d8dcce7064b75f390)" + +#: ../../source/community/contributors.md +msgid "30" +msgstr "30" + +#: ../../source/community/contributors.md +msgid "[@wuhuikx](https://github.com/wuhuikx)" +msgstr "[@wuhuikx](https://github.com/wuhuikx)" + +#: ../../source/community/contributors.md +msgid "" +"[57a84bb](https://github.com/vllm-project/vllm-" +"ascend/commit/57a84bb7befeaa0dc62aa35fa406e4d6affbfcca)" +msgstr "" +"[57a84bb](https://github.com/vllm-project/vllm-" +"ascend/commit/57a84bb7befeaa0dc62aa35fa406e4d6affbfcca)" + +#: ../../source/community/contributors.md +msgid "29" +msgstr "29" + +#: ../../source/community/contributors.md +msgid "[@ZhengZhenyu](https://github.com/ZhengZhenyu)" +msgstr "[@ZhengZhenyu](https://github.com/ZhengZhenyu)" + +#: ../../source/community/contributors.md +msgid "2025/03/26" +msgstr "2025/03/26" + +#: ../../source/community/contributors.md +msgid "" +"[0b5a964](https://github.com/vllm-project/vllm-" +"ascend/commit/0b5a9643fd6c3240d7ede669e37209d7ff433841)" +msgstr "" +"[0b5a964](https://github.com/vllm-project/vllm-" +"ascend/commit/0b5a9643fd6c3240d7ede669e37209d7ff433841)" + +#: ../../source/community/contributors.md +msgid "28" +msgstr "28" + +#: ../../source/community/contributors.md +msgid "[@baifanxxx](https://github.com/baifanxxx)" +msgstr "[@baifanxxx](https://github.com/baifanxxx)" + +#: ../../source/community/contributors.md +msgid "" +"[1225052](https://github.com/vllm-project/vllm-" +"ascend/commit/122505208ff6284f409846ca7294f4a4b9883285)" +msgstr "" +"[1225052](https://github.com/vllm-project/vllm-" +"ascend/commit/122505208ff6284f409846ca7294f4a4b9883285)" + +#: ../../source/community/contributors.md +msgid "27" +msgstr "27" + +#: ../../source/community/contributors.md +msgid "[@zhangxinyuehfad](https://github.com/zhangxinyuehfad)" +msgstr "[@zhangxinyuehfad](https://github.com/zhangxinyuehfad)" + +#: ../../source/community/contributors.md +msgid "2025/03/21" +msgstr "2025/03/21" + +#: ../../source/community/contributors.md +msgid "" +"[608ea3d](https://github.com/vllm-project/vllm-" +"ascend/commit/608ea3d679b3b7a8dfff878ff78599b74f09606d)" +msgstr "" +"[608ea3d](https://github.com/vllm-project/vllm-" +"ascend/commit/608ea3d679b3b7a8dfff878ff78599b74f09606d)" + +#: ../../source/community/contributors.md +msgid "26" +msgstr "26" + +#: ../../source/community/contributors.md +msgid "[@cllouud](https://github.com/cllouud)" +msgstr "[@cllouud](https://github.com/cllouud)" + +#: ../../source/community/contributors.md +msgid "2025/03/18" +msgstr "2025/03/18" + +#: ../../source/community/contributors.md +msgid "" +"[d89d28d](https://github.com/vllm-project/vllm-" +"ascend/commit/d89d28decaf5443051fc76ad2c9f66a2f56d4b9a)" +msgstr "" +"[d89d28d](https://github.com/vllm-project/vllm-" +"ascend/commit/d89d28decaf5443051fc76ad2c9f66a2f56d4b9a)" + +#: ../../source/community/contributors.md +msgid "25" +msgstr "25" + +#: ../../source/community/contributors.md +msgid "[@rjg-lyh](https://github.com/rjg-lyh)" +msgstr "[@rjg-lyh](https://github.com/rjg-lyh)" + +#: ../../source/community/contributors.md +msgid "2025/03/13" +msgstr "2025/03/13" + +#: ../../source/community/contributors.md +msgid "" +"[6512470](https://github.com/vllm-project/vllm-" +"ascend/commit/65124705fb39d4cc2c94c80254421e067a82fe50)" +msgstr "" +"[6512470](https://github.com/vllm-project/vllm-" +"ascend/commit/65124705fb39d4cc2c94c80254421e067a82fe50)" + +#: ../../source/community/contributors.md +msgid "24" +msgstr "24" + +#: ../../source/community/contributors.md +msgid "[@xiemingda-1002](https://github.com/xiemingda-1002)" +msgstr "[@xiemingda-1002](https://github.com/xiemingda-1002)" + +#: ../../source/community/contributors.md +msgid "2025/03/12" +msgstr "2025/03/12" + +#: ../../source/community/contributors.md +msgid "" +"[59ea23d](https://github.com/vllm-project/vllm-" +"ascend/commit/59ea23d0d394879d7f33de6fd22242539b9c3cc5)" +msgstr "" +"[59ea23d](https://github.com/vllm-project/vllm-" +"ascend/commit/59ea23d0d394879d7f33de6fd22242539b9c3cc5)" + +#: ../../source/community/contributors.md +msgid "23" +msgstr "23" + +#: ../../source/community/contributors.md +msgid "2025/03/11" +msgstr "2025/03/11" + +#: ../../source/community/contributors.md +msgid "" +"[0db6670](https://github.com/vllm-project/vllm-" +"ascend/commit/0db6670bfab8cb1d84c9e7270df0a1d42d6ce7ca)" +msgstr "" +"[0db6670](https://github.com/vllm-project/vllm-" +"ascend/commit/0db6670bfab8cb1d84c9e7270df0a1d42d6ce7ca)" + +#: ../../source/community/contributors.md +msgid "22" +msgstr "22" + +#: ../../source/community/contributors.md +msgid "[@new-TonyWang](https://github.com/new-TonyWang)" +msgstr "[@new-TonyWang](https://github.com/new-TonyWang)" + +#: ../../source/community/contributors.md +msgid "" +"[dfb4e23](https://github.com/vllm-project/vllm-" +"ascend/commit/dfb4e23e9d820ac992a071c123bbe983c7b01b2e)" +msgstr "" +"[dfb4e23](https://github.com/vllm-project/vllm-" +"ascend/commit/dfb4e23e9d820ac992a071c123bbe983c7b01b2e)" + +#: ../../source/community/contributors.md +msgid "21" +msgstr "21" + +#: ../../source/community/contributors.md +msgid "[@mengwei805](https://github.com/mengwei805)" +msgstr "[@mengwei805](https://github.com/mengwei805)" + +#: ../../source/community/contributors.md +msgid "2025/03/06" +msgstr "2025/03/06" + +#: ../../source/community/contributors.md +msgid "" +"[8fcf3d1](https://github.com/vllm-project/vllm-" +"ascend/commit/8fcf3d1704084626db35c5dc82ade446508598d4)" +msgstr "" +"[8fcf3d1](https://github.com/vllm-project/vllm-" +"ascend/commit/8fcf3d1704084626db35c5dc82ade446508598d4)" + +#: ../../source/community/contributors.md +msgid "20" +msgstr "20" + +#: ../../source/community/contributors.md +msgid "[@baymax591](https://github.com/baymax591)" +msgstr "[@baymax591](https://github.com/baymax591)" + +#: ../../source/community/contributors.md +msgid "2025/02/28" +msgstr "2025/02/28" + +#: ../../source/community/contributors.md +msgid "" +"[e8131b9](https://github.com/vllm-project/vllm-" +"ascend/commit/e8131b99cf199f50a304e6e6fb125a1b95bcc92b)" +msgstr "" +"[e8131b9](https://github.com/vllm-project/vllm-" +"ascend/commit/e8131b99cf199f50a304e6e6fb125a1b95bcc92b)" + +#: ../../source/community/contributors.md +msgid "19" +msgstr "19" + +#: ../../source/community/contributors.md +msgid "[@wwfu109](https://github.com/wwfu109)" +msgstr "[@wwfu109](https://github.com/wwfu109)" + +#: ../../source/community/contributors.md +msgid "2025/02/27" +msgstr "2025/02/27" + +#: ../../source/community/contributors.md +msgid "" +"[b074047](https://github.com/vllm-project/vllm-" +"ascend/commit/b07404766bdaf6e3cebc5cb0aba89a247501302e)" +msgstr "" +"[b074047](https://github.com/vllm-project/vllm-" +"ascend/commit/b07404766bdaf6e3cebc5cb0aba89a247501302e)" + +#: ../../source/community/contributors.md +msgid "18" +msgstr "18" + +#: ../../source/community/contributors.md +msgid "[@shink](https://github.com/shink)" +msgstr "[@shink](https://github.com/shink)" + +#: ../../source/community/contributors.md +msgid "" +"[6aed833](https://github.com/vllm-project/vllm-" +"ascend/commit/6aed83335cbe92fd0b8ef07c28966a753d012ccb)" +msgstr "" +"[6aed833](https://github.com/vllm-project/vllm-" +"ascend/commit/6aed83335cbe92fd0b8ef07c28966a753d012ccb)" + +#: ../../source/community/contributors.md +msgid "17" +msgstr "17" + +#: ../../source/community/contributors.md +msgid "[@dependabot[bot]](https://github.com/dependabot[bot])" +msgstr "[@dependabot[bot]](https://github.com/dependabot[bot])" + +#: ../../source/community/contributors.md +msgid "" +"[a5564ed](https://github.com/vllm-project/vllm-" +"ascend/commit/a5564ed5d8fd9818936a22d9ea35951a27513b4c)" +msgstr "" +"[a5564ed](https://github.com/vllm-project/vllm-" +"ascend/commit/a5564ed5d8fd9818936a22d9ea35951a27513b4c)" + +#: ../../source/community/contributors.md +msgid "16" +msgstr "16" + +#: ../../source/community/contributors.md +msgid "[@kunpengW-code](https://github.com/kunpengW-code)" +msgstr "[@kunpengW-code](https://github.com/kunpengW-code)" + +#: ../../source/community/contributors.md +msgid "2025/02/26" +msgstr "2025/02/26" + +#: ../../source/community/contributors.md +msgid "" +"[ca807ce](https://github.com/vllm-project/vllm-" +"ascend/commit/ca807ce49ed64aa89242f5ae29b9862a77648b45)" +msgstr "" +"[ca807ce](https://github.com/vllm-project/vllm-" +"ascend/commit/ca807ce49ed64aa89242f5ae29b9862a77648b45)" + +#: ../../source/community/contributors.md +msgid "15" +msgstr "15" + +#: ../../source/community/contributors.md +msgid "[@Yaphets24](https://github.com/Yaphets24)" +msgstr "[@Yaphets24](https://github.com/Yaphets24)" + +#: ../../source/community/contributors.md +msgid "2025/02/22" +msgstr "2025/02/22" + +#: ../../source/community/contributors.md +msgid "" +"[d0b3cb4](https://github.com/vllm-project/vllm-" +"ascend/commit/d0b3cb4fa79d5fc7f8245a3c68885ce1fa030ba4)" +msgstr "" +"[d0b3cb4](https://github.com/vllm-project/vllm-" +"ascend/commit/d0b3cb4fa79d5fc7f8245a3c68885ce1fa030ba4)" + +#: ../../source/community/contributors.md +msgid "14" +msgstr "14" + +#: ../../source/community/contributors.md +msgid "[@noemotiovon](https://github.com/noemotiovon)" +msgstr "[@noemotiovon](https://github.com/noemotiovon)" + +#: ../../source/community/contributors.md +msgid "2025/02/21" +msgstr "2025/02/21" + +#: ../../source/community/contributors.md +msgid "" +"[202b39a](https://github.com/vllm-project/vllm-" +"ascend/commit/202b39a38c2869b0ecc3df486550fb555a2eb0c0)" +msgstr "" +"[202b39a](https://github.com/vllm-project/vllm-" +"ascend/commit/202b39a38c2869b0ecc3df486550fb555a2eb0c0)" + +#: ../../source/community/contributors.md +msgid "13" +msgstr "13" + +#: ../../source/community/contributors.md +msgid "[@SidaoY](https://github.com/SidaoY)" +msgstr "[@SidaoY](https://github.com/SidaoY)" + +#: ../../source/community/contributors.md +msgid "2025/02/18" +msgstr "2025/02/18" + +#: ../../source/community/contributors.md +msgid "" +"[718c763](https://github.com/vllm-project/vllm-" +"ascend/commit/718c7638555d12cd43ea2a9e497e185778b68595)" +msgstr "" +"[718c763](https://github.com/vllm-project/vllm-" +"ascend/commit/718c7638555d12cd43ea2a9e497e185778b68595)" + +#: ../../source/community/contributors.md +msgid "12" +msgstr "12" + +#: ../../source/community/contributors.md +msgid "[@ShiyaNiu](https://github.com/ShiyaNiu)" +msgstr "[@ShiyaNiu](https://github.com/ShiyaNiu)" + +#: ../../source/community/contributors.md +msgid "2025/02/17" +msgstr "2025/02/17" + +#: ../../source/community/contributors.md +msgid "" +"[36ea38f](https://github.com/vllm-project/vllm-" +"ascend/commit/36ea38fde56437ff1745bd95cd8d9e02a6578d38)" +msgstr "" +"[36ea38f](https://github.com/vllm-project/vllm-" +"ascend/commit/36ea38fde56437ff1745bd95cd8d9e02a6578d38)" + +#: ../../source/community/contributors.md +msgid "11" +msgstr "11" + +#: ../../source/community/contributors.md +msgid "[@ji-huazhong](https://github.com/ji-huazhong)" +msgstr "[@ji-huazhong](https://github.com/ji-huazhong)" + +#: ../../source/community/contributors.md +msgid "2025/02/12" +msgstr "2025/02/12" + +#: ../../source/community/contributors.md +msgid "" +"[c8b57d1](https://github.com/vllm-project/vllm-" +"ascend/commit/c8b57d10b24efcd9b4fadeb66cfbf66aa3dd5f82)" +msgstr "" +"[c8b57d1](https://github.com/vllm-project/vllm-" +"ascend/commit/c8b57d10b24efcd9b4fadeb66cfbf66aa3dd5f82)" + +#: ../../source/community/contributors.md +msgid "10" +msgstr "10" + +#: ../../source/community/contributors.md +msgid "[@Angazenn](https://github.com/Angazenn)" +msgstr "[@Angazenn](https://github.com/Angazenn)" + +#: ../../source/community/contributors.md +msgid "2025/02/11" +msgstr "2025/02/11" + +#: ../../source/community/contributors.md +msgid "" +"[7637759](https://github.com/vllm-project/vllm-" +"ascend/commit/7637759056028839c74960d9cfd3ce6275ee5d35)" +msgstr "" +"[7637759](https://github.com/vllm-project/vllm-" +"ascend/commit/7637759056028839c74960d9cfd3ce6275ee5d35)" + +#: ../../source/community/contributors.md +msgid "9" +msgstr "9" + +#: ../../source/community/contributors.md +msgid "2025/02/08" +msgstr "2025/02/08" + +#: ../../source/community/contributors.md +msgid "" +"[49e5baf](https://github.com/vllm-project/vllm-" +"ascend/commit/49e5baf8596bdf6239b01b4124e9a105d0e6b203)" +msgstr "" +"[49e5baf](https://github.com/vllm-project/vllm-" +"ascend/commit/49e5baf8596bdf6239b01b4124e9a105d0e6b203)" + +#: ../../source/community/contributors.md +msgid "8" +msgstr "8" + +#: ../../source/community/contributors.md +msgid "[@zouyida2002](https://github.com/zouyida2002)" +msgstr "[@zouyida2002](https://github.com/zouyida2002)" + +#: ../../source/community/contributors.md +msgid "2025/02/07" +msgstr "2025/02/07" + +#: ../../source/community/contributors.md +msgid "" +"[4495fc6](https://github.com/vllm-project/vllm-" +"ascend/commit/4495fc68389e3fb1ef14534c202948931e38446b)" +msgstr "" +"[4495fc6](https://github.com/vllm-project/vllm-" +"ascend/commit/4495fc68389e3fb1ef14534c202948931e38446b)" + +#: ../../source/community/contributors.md +msgid "7" +msgstr "7" + +#: ../../source/community/contributors.md +msgid "" +"[8fc5dc9](https://github.com/vllm-project/vllm-" +"ascend/commit/8fc5dc966aaf4e174d1ec0d1902c40289411ec0e)" +msgstr "" +"[8fc5dc9](https://github.com/vllm-project/vllm-" +"ascend/commit/8fc5dc966aaf4e174d1ec0d1902c40289411ec0e)" + +#: ../../source/community/contributors.md +msgid "6" +msgstr "6" + +#: ../../source/community/contributors.md +msgid "2025/02/06" +msgstr "2025/02/06" + +#: ../../source/community/contributors.md +msgid "" +"[a48b9ad](https://github.com/vllm-project/vllm-" +"ascend/commit/a48b9addefd292af523644411d4ff4142dd4bc66)" +msgstr "" +"[a48b9ad](https://github.com/vllm-project/vllm-" +"ascend/commit/a48b9addefd292af523644411d4ff4142dd4bc66)" + +#: ../../source/community/contributors.md msgid "5" msgstr "5" -#: ../../community/contributors.md -msgid "[@Potabk](https://github.com/Potabk)" -msgstr "[@Potabk](https://github.com/Potabk)" - -#: ../../community/contributors.md -msgid "" -"[8cb5615](https://github.com/vllm-project/vllm-" -"ascend/commit/8cb5615fb010b34c2f4f89e03e6257bfee851f86)" -msgstr "" -"[8cb5615](https://github.com/vllm-project/vllm-" -"ascend/commit/8cb5615fb010b34c2f4f89e03e6257bfee851f86)" - -#: ../../community/contributors.md -msgid "4" -msgstr "4" - -#: ../../community/contributors.md -msgid "" -"[a48b9ad](https://github.com/vllm-project/vllm-" -"ascend/commit/a48b9addefd292af523644411d4ff4142dd4bc66)" -msgstr "" -"[a48b9ad](https://github.com/vllm-project/vllm-" -"ascend/commit/a48b9addefd292af523644411d4ff4142dd4bc66)" - -#: ../../community/contributors.md -msgid "3" -msgstr "3" - -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "[@shen-shanshan](https://github.com/shen-shanshan)" msgstr "[@shen-shanshan](https://github.com/shen-shanshan)" -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "" "[bfccf73](https://github.com/vllm-project/vllm-" "ascend/commit/bfccf739e2fe121b54d9b198c2ec205a9379190e)" @@ -1610,15 +6389,43 @@ msgstr "" "[bfccf73](https://github.com/vllm-project/vllm-" "ascend/commit/bfccf739e2fe121b54d9b198c2ec205a9379190e)" -#: ../../community/contributors.md +#: ../../source/community/contributors.md +msgid "4" +msgstr "4" + +#: ../../source/community/contributors.md +msgid "[@Potabk](https://github.com/Potabk)" +msgstr "[@Potabk](https://github.com/Potabk)" + +#: ../../source/community/contributors.md +msgid "" +"[8cb5615](https://github.com/vllm-project/vllm-" +"ascend/commit/8cb5615fb010b34c2f4f89e03e6257bfee851f86)" +msgstr "" +"[8cb5615](https://github.com/vllm-project/vllm-" +"ascend/commit/8cb5615fb010b34c2f4f89e03e6257bfee851f86)" + +#: ../../source/community/contributors.md +msgid "3" +msgstr "3" + +#: ../../source/community/contributors.md +msgid "" +"[7d9ae22](https://github.com/vllm-project/vllm-" +"ascend/commit/7d9ae22ecb6dc3ea4e720e5109cf46e1ae7da730)" +msgstr "" +"[7d9ae22](https://github.com/vllm-project/vllm-" +"ascend/commit/7d9ae22ecb6dc3ea4e720e5109cf46e1ae7da730)" + +#: ../../source/community/contributors.md msgid "2" msgstr "2" -#: ../../community/contributors.md -msgid "2025/2/5" -msgstr "2025/2/5" +#: ../../source/community/contributors.md +msgid "2025/02/05" +msgstr "2025/02/05" -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "" "[d5e7756](https://github.com/vllm-project/vllm-" "ascend/commit/d5e7756028bd5884ade96b654555c375770a2f64)" @@ -1626,19 +6433,19 @@ msgstr "" "[d5e7756](https://github.com/vllm-project/vllm-" "ascend/commit/d5e7756028bd5884ade96b654555c375770a2f64)" -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "1" msgstr "1" -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "[@simon-mo](https://github.com/simon-mo)" msgstr "[@simon-mo](https://github.com/simon-mo)" -#: ../../community/contributors.md -msgid "2025/1/29" -msgstr "2025/1/29" +#: ../../source/community/contributors.md +msgid "2025/01/29" +msgstr "2025/01/29" -#: ../../community/contributors.md +#: ../../source/community/contributors.md msgid "" "[eb28342](https://github.com/vllm-project/vllm-" "ascend/commit/eb283428ddc17207b6866118f9bc15454b5b8801)" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po b/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po index 030aa24b..f0382a9d 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po @@ -4,201 +4,197 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../community/governance.md:1 +#: ../../source/community/governance.md:1 msgid "Governance" msgstr "治理" -#: ../../community/governance.md:3 +#: ../../source/community/governance.md:3 msgid "Mission" msgstr "使命" -#: ../../community/governance.md:4 +#: ../../source/community/governance.md:5 msgid "" "As a vital component of vLLM, the vLLM Ascend project is dedicated to " -"providing an easy, fast, and cheap LLM Serving for Everyone on Ascend NPU, " -"and to actively contribute to the enrichment of vLLM." +"providing an easy, fast, and cheap LLM Serving for everyone on Ascend " +"NPUs and to actively contributing to the enrichment of vLLM." msgstr "" -"作为 vLLM 的重要组成部分,vLLM Ascend 项目致力于为所有人在 Ascend NPU 上提供简单、快速且低成本的大语言模型服务,并积极促进" -" vLLM 的丰富发展。" +"作为 vLLM 的重要组成部分,vLLM Ascend 项目致力于为所有人在昇腾 NPU " +"上提供简单、快速且低成本的大语言模型服务,并积极为丰富 vLLM 生态系统做出贡献。" -#: ../../community/governance.md:6 +#: ../../source/community/governance.md:7 msgid "Principles" msgstr "原则" -#: ../../community/governance.md:7 +#: ../../source/community/governance.md:9 msgid "" -"vLLM Ascend follows the vLLM community's code of conduct:[vLLM - CODE OF " -"CONDUCT](https://github.com/vllm-project/vllm/blob/main/CODE_OF_CONDUCT.md)" +"vLLM Ascend follows the vLLM community's code of conduct: [vLLM - CODE OF" +" CONDUCT](https://github.com/vllm-" +"project/vllm/blob/main/CODE_OF_CONDUCT.md)" msgstr "" "vLLM Ascend 遵循 vLLM 社区的行为准则:[vLLM - 行为准则](https://github.com/vllm-" "project/vllm/blob/main/CODE_OF_CONDUCT.md)" -#: ../../community/governance.md:9 +#: ../../source/community/governance.md:11 msgid "Governance - Mechanics" msgstr "治理 - 机制" -#: ../../community/governance.md:10 +#: ../../source/community/governance.md:13 msgid "" -"vLLM Ascend is an open-source project under the vLLM community, where the " -"authority to appoint roles is ultimately determined by the vLLM community. " -"It adopts a hierarchical technical governance structure." +"vLLM Ascend is an open-source project under the vLLM community, where the" +" authority to appoint roles is ultimately determined by the vLLM " +"community. It adopts a hierarchical technical governance structure." msgstr "vLLM Ascend 是 vLLM 社区下的一个开源项目,其角色任命权最终由 vLLM 社区决定。它采用分层的技术治理结构。" -#: ../../community/governance.md:12 +#: ../../source/community/governance.md:15 msgid "Contributor:" msgstr "贡献者:" -#: ../../community/governance.md:14 +#: ../../source/community/governance.md:17 msgid "" -"**Responsibility:** Help new contributors on boarding, handle and respond to" -" community questions, review RFCs, code" -msgstr "**职责:** 帮助新贡献者加入,处理和回复社区问题,审查RFC和代码" +"**Responsibility:** Help new contributors onboarding, handle and respond " +"to community questions, review RFCs and code." +msgstr "**职责:** 帮助新贡献者加入,处理和回复社区问题,审查 RFC 和代码。" -#: ../../community/governance.md:16 +#: ../../source/community/governance.md:19 msgid "" -"**Requirements:** Complete at least 1 contribution. Contributor is someone " -"who consistently and actively participates in a project, included but not " -"limited to issue/review/commits/community involvement." -msgstr "**要求:** 完成至少1次贡献。贡献者是指持续且积极参与项目的人,包括但不限于问题、评审、提交和社区参与。" +"**Requirements:** Complete at least 1 contribution. A contributor is " +"someone who consistently and actively participates in a project, " +"including but not limited to issue/review/commits/community involvement." +msgstr "**要求:** 完成至少 1 次贡献。贡献者是指持续且积极参与项目的人,包括但不限于提交问题、进行评审、提交代码和参与社区活动。" -#: ../../community/governance.md:18 +#: ../../source/community/governance.md:21 msgid "" -"Contributors will be empowered [vllm-project/vllm-" -"ascend](https://github.com/vllm-project/vllm-ascend) Github repo `Triage` " -"permissions (`Can read and clone this repository. Can also manage issues and" -" pull requests`) to help community developers collaborate more efficiently." +"The contributor permissions are granted by the [vllm-project/vllm-" +"ascend](https://github.com/vllm-project/vllm-ascend)'s repo `Triage` on " +"GitHub, including repo read and clone, issue and PR management, " +"facilitating efficient collaboration between community developers." msgstr "" -"贡献者将被赋予 [vllm-project/vllm-ascend](https://github.com/vllm-project/vllm-" -"ascend) Github 仓库的 `Triage` 权限(`可读取和克隆此仓库。还可以管理问题和拉取请求`),以帮助社区开发者更加高效地协作。" +"贡献者将被授予 [vllm-project/vllm-ascend](https://github.com/vllm-project/vllm-" +"ascend) GitHub 仓库的 `Triage` 权限(包括仓库读取和克隆、问题和拉取请求管理),以促进社区开发者之间的高效协作。" -#: ../../community/governance.md:20 +#: ../../source/community/governance.md:23 msgid "Maintainer:" msgstr "维护者:" -#: ../../community/governance.md:22 +#: ../../source/community/governance.md:25 msgid "" -"**Responsibility:** Develop the project's vision and mission. Maintainers " -"are responsible for driving the technical direction of the entire project " -"and ensuring its overall success, possessing code merge permissions. They " -"formulate the roadmap, review contributions from community members, " -"continuously contribute code, and actively engage in community activities " -"(such as regular meetings/events)." +"**Responsibility:** Develop the project's vision and mission. Maintainers" +" are responsible for shaping the technical direction of the project and " +"ensuring its long-term success. With code merge permissions, they lead " +"roadmap planning, review community contributions, make ongoing code " +"improvements, and actively participate in community engagement—such as " +"regular meetings and events." msgstr "" -"**责任:** " -"制定项目的愿景和使命。维护者负责引领整个项目的技术方向并确保其整体成功,拥有代码合并权限。他们制定路线图,审核社区成员的贡献,持续贡献代码,并积极参与社区活动(如定期会议/活动)。" +"**职责:** " +"制定项目的愿景和使命。维护者负责引领项目的技术方向并确保其长期成功,拥有代码合并权限。他们制定路线图,审核社区贡献,持续改进代码,并积极参与社区活动(如定期会议和活动)。" -#: ../../community/governance.md:24 +#: ../../source/community/governance.md:27 msgid "" -"**Requirements:** Deep understanding of ‌vLLM‌ and ‌vLLM Ascend‌ codebases, " -"with a commitment to sustained code contributions. Competency in " -"‌design/development/PR review workflows‌." -msgstr "" -"**要求:** 深入理解 ‌vLLM‌ 和 ‌vLLM Ascend‌ 代码库,并承诺持续贡献代码。具备 ‌设计/开发/PR 审核流程‌ 的能力。" +"**Requirements:** Deep understanding of ‌vLLM‌ and ‌vLLM Ascend‌ code " +"bases, with a commitment to sustained code contributions and competency " +"in ‌design, development, and PR review workflows‌." +msgstr "**要求:** 深入理解 ‌vLLM‌ 和 ‌vLLM Ascend‌ 代码库,承诺持续贡献代码,并具备 ‌设计、开发和 PR 审核工作流‌ 的能力。" -#: ../../community/governance.md:25 +#: ../../source/community/governance.md:29 msgid "" -"**Review Quality‌:** Actively participate in community code reviews, " +"**Review quality‌:** Actively participate in community code reviews, " "ensuring high-quality code integration." msgstr "**评审质量:** 积极参与社区代码评审,确保高质量的代码集成。" -#: ../../community/governance.md:26 +#: ../../source/community/governance.md:30 msgid "" -"**Quality Contribution‌:** Successfully develop and deliver at least one " +"**Quality contribution‌:** Successfully develop and deliver at least one " "major feature while maintaining consistent high-quality contributions." -msgstr "**质量贡献‌:** 成功开发并交付至少一个主要功能,同时持续保持高质量的贡献。" +msgstr "**质量贡献:** 成功开发并交付至少一个主要功能,同时保持持续的高质量贡献。" -#: ../../community/governance.md:27 +#: ../../source/community/governance.md:31 msgid "" -"**Community Involvement‌:** Actively address issues, respond to forum " -"inquiries, participate in discussions, and engage in community-driven tasks." -msgstr "**社区参与:** 积极解决问题,回复论坛询问,参与讨论,并参与社区驱动的任务。" +"**Community involvement‌:** Actively address issues, respond to forum " +"inquiries, participate in discussions, and engage in community-driven " +"tasks." +msgstr "**社区参与:** 积极解决问题,回复论坛询问,参与讨论,并投身于社区驱动的任务。" -#: ../../community/governance.md:29 +#: ../../source/community/governance.md:33 msgid "" -"Requires approval from existing Maintainers. The vLLM community has the " -"final decision-making authority." -msgstr "需要现有维护者的批准。vLLM社区拥有最终决策权。" - -#: ../../community/governance.md:31 -msgid "" -"Maintainer will be empowered [vllm-project/vllm-" -"ascend](https://github.com/vllm-project/vllm-ascend) Github repo write " -"permissions (`Can read, clone, and push to this repository. Can also manage " -"issues and pull requests`)." +"The approval from existing Maintainers is required. The vLLM community " +"has the final decision-making authority. Maintainers will be granted " +"write access to the [vllm-project/vllm-ascend](https://github.com/vllm-" +"project/vllm-ascend) GitHub repo. This includes permission to read, " +"clone, and push to the repository, as well as manage issues and pull " +"requests." msgstr "" -"维护者将被授予 [vllm-project/vllm-ascend](https://github.com/vllm-project/vllm-" -"ascend) Github 仓库的写入权限(`可以读取、克隆和推送到此仓库。还可以管理问题和拉取请求`)。" +"需要获得现有维护者的批准。vLLM 社区拥有最终决策权。维护者将被授予对 [vllm-project/vllm-" +"ascend](https://github.com/vllm-project/vllm-ascend) GitHub 仓库的写入权限。这包括读取、克隆和推送仓库的权限,以及管理问题和拉取请求的权限。" -#: ../../community/governance.md:33 +#: ../../source/community/governance.md:36 msgid "Nominating and Removing Maintainers" msgstr "提名和移除维护者" -#: ../../community/governance.md:35 +#: ../../source/community/governance.md:38 msgid "The Principles" msgstr "原则" -#: ../../community/governance.md:37 +#: ../../source/community/governance.md:40 msgid "" -"Membership in vLLM Ascend is given to individuals on merit basis after they " -"demonstrated strong expertise of the vLLM / vLLM Ascend through " -"contributions, reviews and discussions." +"Membership in vLLM Ascend is given to individuals on a merit basis after " +"they demonstrate their strong expertise in vLLM/vLLM Ascend through " +"contributions, reviews, and discussions." msgstr "" "vLLM Ascend 的成员资格是基于个人能力授予的,只有在通过贡献、评审和讨论展示出对 vLLM / vLLM Ascend " "的深厚专业知识后,才可获得。" -#: ../../community/governance.md:39 +#: ../../source/community/governance.md:42 msgid "" -"For membership in the maintainer group the individual has to demonstrate " -"strong and continued alignment with the overall vLLM / vLLM Ascend " +"For membership in the maintainer group, individuals have to demonstrate " +"strong and continued alignment with the overall vLLM/vLLM Ascend " "principles." msgstr "要成为维护者组成员,个人必须表现出与 vLLM / vLLM Ascend 总体原则的高度一致并持续支持。" -#: ../../community/governance.md:41 +#: ../../source/community/governance.md:44 msgid "" -"Light criteria of moving module maintenance to ‘emeritus’ status if they " -"don’t actively participate over long periods of time." -msgstr "如果模块维护人员在长时间内没有积极参与,可根据较宽松的标准将其维护状态转为“荣誉”状态。" +"Maintainers who have been inactive for a long time may be transitioned to" +" **emeritus** status under lenient criteria." +msgstr "长期不活跃的维护者,可根据宽松的标准转为 **荣誉** 状态。" -#: ../../community/governance.md:43 +#: ../../source/community/governance.md:46 msgid "The membership is for an individual, not a company." -msgstr "该会员资格属于个人,而非公司。" +msgstr "该成员资格属于个人,而非公司。" -#: ../../community/governance.md:45 +#: ../../source/community/governance.md:48 msgid "Nomination and Removal" msgstr "提名与罢免" -#: ../../community/governance.md:47 +#: ../../source/community/governance.md:50 msgid "" -"Nomination: Anyone can nominate someone to become a maintainer (include " -"self-nominate). All existing maintainers are responsible for evaluating the " -"nomination. The nominator should provide nominee's info around the strength " -"of the candidate to be a maintainer, include but not limited to review " -"quality, quality contribution, community involvement." -msgstr "" -"提名:任何人都可以提名他人成为维护者(包括自荐)。所有现有维护者都有责任评估提名。提名人应提供被提名人成为维护者的相关优势信息,包括但不限于评审质量、优质贡献、社区参与等。" +"Nomination: Anyone can nominate a candidate to become a maintainer, " +"including self-nominations. All existing maintainers are responsible for " +"reviewing and evaluating each nomination. The nominator should provide " +"relevant information about the nominee's qualifications—such as review " +"quality, quality contribution, and community involvement—among other " +"strengths." +msgstr "提名:任何人都可以提名候选人成为维护者(包括自荐)。所有现有维护者都有责任审查和评估每项提名。提名人应提供被提名人的相关资格信息,例如评审质量、质量贡献和社区参与度等优势。" -#: ../../community/governance.md:48 +#: ../../source/community/governance.md:51 msgid "" -"Removal: Anyone can nominate a person to be removed from maintainer position" -" (include self-nominate). All existing maintainers are responsible for " -"evaluating the nomination. The nominator should provide nominee's info, " -"include but not limited to lack of activity, conflict with the overall " -"direction and other information that makes them unfit to be a maintainer." -msgstr "" -"移除:任何人都可以提名某人被移出维护者职位(包括自荐)。所有现有维护者都有责任评估该提名。提名者应提供被提名人的相关信息,包括但不限于缺乏活动、与整体方向冲突以及使其不适合作为维护者的其他信息。" +"Removal: Anyone may nominate an individual for removal from the " +"maintainer role, including self-nominations. All current maintainers are " +"responsible for reviewing and evaluating such nominations. The nominator " +"should provide relevant information about the nominee—such as prolonged " +"inactivity, misalignment with the project's overall direction, or other " +"factors that may render them unsuitable for the maintainer position." +msgstr "移除:任何人都可以提名某人从维护者角色中移除(包括自荐)。所有现任维护者都有责任审查和评估此类提名。提名人应提供被提名人的相关信息,例如长期不活跃、与项目整体方向不一致,或其他可能使其不适合担任维护者职位的因素。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po b/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po index 3d91ba72..842eb34f 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po @@ -4,100 +4,98 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../community/user_stories/index.md:15 +#: ../../source/community/user_stories/index.md:15 msgid "More details" -msgstr "更多细节" +msgstr "更多详情" -#: ../../community/user_stories/index.md:1 +#: ../../source/community/user_stories/index.md:1 msgid "User Stories" -msgstr "用户故事" +msgstr "用户案例" -#: ../../community/user_stories/index.md:3 +#: ../../source/community/user_stories/index.md:3 msgid "" -"Read case studies on how users and developers solves real, everyday problems" -" with vLLM Ascend" -msgstr "阅读案例研究,了解用户和开发者如何使用 vLLM Ascend 解决实际日常问题。" +"Read case studies on how users and developers solve real, everyday " +"problems with vLLM Ascend" +msgstr "阅读案例研究,了解用户和开发者如何利用 vLLM Ascend 解决实际日常问题。" -#: ../../community/user_stories/index.md:5 +#: ../../source/community/user_stories/index.md:5 msgid "" -"[LLaMA-Factory](./llamafactory.md) is an easy-to-use and efficient platform " -"for training and fine-tuning large language models, it supports vLLM Ascend " -"to speed up inference since [LLaMA-" -"Factory#7739](https://github.com/hiyouga/LLaMA-Factory/pull/7739), gain 2x " -"performance enhancement of inference." +"[LLaMA-Factory](./llamafactory.md) is an easy-to-use and efficient " +"platform for training and fine-tuning large language models. It supports " +"vLLM Ascend to speed up inference since [LLaMA-" +"Factory#7739](https://github.com/hiyouga/LLaMA-Factory/pull/7739), " +"gaining 2x performance enhancement in inference." msgstr "" -"[LLaMA-Factory](./llamafactory.md) 是一个易于使用且高效的大语言模型训练与微调平台,自 [LLaMA-" -"Factory#7739](https://github.com/hiyouga/LLaMA-Factory/pull/7739) 起支持 vLLM " -"Ascend 加速推理,推理性能提升 2 倍。" +"[LLaMA-Factory](./llamafactory.md) 是一个易于使用且高效的大语言模型训练与微调平台。自 " +"[LLaMA-Factory#7739](https://github.com/hiyouga/LLaMA-Factory/pull/7739) 起支持 " +"vLLM Ascend 以加速推理,推理性能提升 2 倍。" -#: ../../community/user_stories/index.md:7 +#: ../../source/community/user_stories/index.md:7 msgid "" "[Huggingface/trl](https://github.com/huggingface/trl) is a cutting-edge " "library designed for post-training foundation models using advanced " -"techniques like SFT, PPO and DPO, it uses vLLM Ascend since " +"techniques like SFT, PPO and DPO. It uses vLLM Ascend since " "[v0.17.0](https://github.com/huggingface/trl/releases/tag/v0.17.0) to " -"support RLHF on Ascend NPU." +"support RLHF on Ascend NPUs." msgstr "" -"[Huggingface/trl](https://github.com/huggingface/trl) 是一个前沿的库,专为使用 SFT、PPO 和" -" DPO 等先进技术对基础模型进行后训练而设计。从 " -"[v0.17.0](https://github.com/huggingface/trl/releases/tag/v0.17.0) 版本开始,该库利用" -" vLLM Ascend 来支持在 Ascend NPU 上进行 RLHF。" +"[Huggingface/trl](https://github.com/huggingface/trl) 是一个前沿的库,专为使用 SFT、PPO 和 DPO " +"等先进技术对基础模型进行后训练而设计。自 " +"[v0.17.0](https://github.com/huggingface/trl/releases/tag/v0.17.0) 起,该库使用 " +"vLLM Ascend 以支持在昇腾 NPU 上进行 RLHF。" -#: ../../community/user_stories/index.md:9 +#: ../../source/community/user_stories/index.md:9 msgid "" -"[MindIE Turbo](https://pypi.org/project/mindie-turbo) is an LLM inference " -"engine acceleration plug-in library developed by Huawei on Ascend hardware, " -"which includes self-developed large language model optimization algorithms " -"and optimizations related to the inference engine framework. It supports " -"vLLM Ascend since " -"[2.0rc1](https://www.hiascend.com/document/detail/zh/mindie/20RC1/AcceleratePlugin/turbodev/mindie-" -"turbo-0001.html)." +"[MindIE Turbo](https://pypi.org/project/mindie-turbo) is an LLM inference" +" engine acceleration plugin library developed by Huawei on Ascend " +"hardware, which includes self-developed LLM optimization algorithms and " +"optimizations related to the inference engine framework. It supports vLLM" +" Ascend since " +"[2.0rc1](https://www.hiascend.com/document/detail/zh/mindie/20RC1/AcceleratePlugin/turbodev" +"/mindie-turbo-0001.html)." msgstr "" "[MindIE Turbo](https://pypi.org/project/mindie-turbo) " -"是华为在昇腾硬件上开发的一款用于加速LLM推理引擎的插件库,包含自主研发的大语言模型优化算法及与推理引擎框架相关的优化。从 " -"[2.0rc1](https://www.hiascend.com/document/detail/zh/mindie/20RC1/AcceleratePlugin/turbodev/mindie-" -"turbo-0001.html) 起,支持 vLLM Ascend。" +"是华为在昇腾硬件上开发的一款用于加速大语言模型推理引擎的插件库,包含自主研发的大语言模型优化算法及与推理引擎框架相关的优化。自 " +"[2.0rc1](https://www.hiascend.com/document/detail/zh/mindie/20RC1/AcceleratePlugin/turbodev" +"/mindie-turbo-0001.html) 起,支持 vLLM Ascend。" -#: ../../community/user_stories/index.md:11 +#: ../../source/community/user_stories/index.md:11 msgid "" "[GPUStack](https://github.com/gpustack/gpustack) is an open-source GPU " "cluster manager for running AI models. It supports vLLM Ascend since " -"[v0.6.2](https://github.com/gpustack/gpustack/releases/tag/v0.6.2), see more" -" GPUStack performance evaluation info on " -"[link](https://mp.weixin.qq.com/s/pkytJVjcH9_OnffnsFGaew)." +"[v0.6.2](https://github.com/gpustack/gpustack/releases/tag/v0.6.2). See " +"more GPUStack performance evaluation information at [this " +"link](https://mp.weixin.qq.com/s/pkytJVjcH9_OnffnsFGaew)." msgstr "" -"[GPUStack](https://github.com/gpustack/gpustack) 是一个开源的 GPU 集群管理器,用于运行 AI " -"模型。从 [v0.6.2](https://github.com/gpustack/gpustack/releases/tag/v0.6.2) " -"版本开始支持 vLLM Ascend,更多 GPUStack 性能评测信息见 " -"[链接](https://mp.weixin.qq.com/s/pkytJVjcH9_OnffnsFGaew)。" +"[GPUStack](https://github.com/gpustack/gpustack) 是一个开源的 GPU 集群管理器,用于运行 AI 模型。自 " +"[v0.6.2](https://github.com/gpustack/gpustack/releases/tag/v0.6.2) 起支持 vLLM " +"Ascend。更多 GPUStack 性能评测信息请参见 " +"[此链接](https://mp.weixin.qq.com/s/pkytJVjcH9_OnffnsFGaew)。" -#: ../../community/user_stories/index.md:13 +#: ../../source/community/user_stories/index.md:13 msgid "" -"[verl](https://github.com/volcengine/verl) is a flexible, efficient and " -"production-ready RL training library for large language models (LLMs), uses " -"vLLM Ascend since " -"[v0.4.0](https://github.com/volcengine/verl/releases/tag/v0.4.0), see more " -"info on [verl x Ascend " -"Quickstart](https://verl.readthedocs.io/en/latest/ascend_tutorial/ascend_quick_start.html)." +"[verl](https://github.com/volcengine/verl) is a flexible, efficient, and " +"production-ready RL training library for LLMs. It uses vLLM Ascend since " +"[v0.4.0](https://github.com/volcengine/verl/releases/tag/v0.4.0). See " +"more information on [verl x Ascend " +"Quickstart](https://verl.readthedocs.io/en/latest/ascend_tutorial/quick_start/ascend_quick_start.html)." msgstr "" "[verl](https://github.com/volcengine/verl) " -"是一个灵活、高效且可用于生产环境的大型语言模型(LLM)强化学习训练库,自 " -"[v0.4.0](https://github.com/volcengine/verl/releases/tag/v0.4.0) 起支持 vLLM " -"Ascend,更多信息请参见 [verl x Ascend " -"快速上手](https://verl.readthedocs.io/en/latest/ascend_tutorial/ascend_quick_start.html)。" +"是一个灵活、高效且可用于生产环境的大语言模型强化学习训练库。自 " +"[v0.4.0](https://github.com/volcengine/verl/releases/tag/v0.4.0) 起,该库使用 " +"vLLM Ascend。更多信息请参见 [verl x Ascend " +"快速入门](https://verl.readthedocs.io/en/latest/ascend_tutorial/quick_start/ascend_quick_start.html)。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po b/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po index 4e8c7718..f0b3444d 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po @@ -4,84 +4,76 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../community/user_stories/llamafactory.md:1 +#: ../../source/community/user_stories/llamafactory.md:1 msgid "LLaMA-Factory" msgstr "LLaMA-Factory" -#: ../../community/user_stories/llamafactory.md:3 -msgid "**About / Introduction**" -msgstr "**关于 / 介绍**" +#: ../../source/community/user_stories/llamafactory.md:3 +msgid "**Introduction**" +msgstr "**简介**" -#: ../../community/user_stories/llamafactory.md:5 +#: ../../source/community/user_stories/llamafactory.md:5 msgid "" -"[LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory) is an easy-to-use " -"and efficient platform for training and fine-tuning large language models. " -"With LLaMA-Factory, you can fine-tune hundreds of pre-trained models locally" -" without writing any code." +"[LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory) is an easy-to-" +"use and efficient platform for training and fine-tuning large language " +"models. With LLaMA-Factory, you can fine-tune hundreds of pre-trained " +"models locally without writing any code." msgstr "" -"[LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory) " -"是一个易于使用且高效的平台,用于训练和微调大型语言模型。有了 LLaMA-Factory,你可以在本地对数百个预训练模型进行微调,无需编写任何代码。" +"[LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory) 是一个易于使用且高效的平台,用于训练和微调大型语言模型。通过 LLaMA-Factory,您可以在本地对数百个预训练模型进行微调,无需编写任何代码。" -#: ../../community/user_stories/llamafactory.md:7 +#: ../../source/community/user_stories/llamafactory.md:7 msgid "" -"LLaMA-Facotory users need to evaluate and inference the model after fine-" -"tuning the model." -msgstr "LLaMA-Facotory 用户需要在对模型进行微调后对模型进行评估和推理。" +"LLaMA-Factory users need to evaluate the model and perform inference " +"after fine-tuning." +msgstr "LLaMA-Factory 用户在完成微调后,需要对模型进行评估和推理。" -#: ../../community/user_stories/llamafactory.md:9 -msgid "**The Business Challenge**" +#: ../../source/community/user_stories/llamafactory.md:9 +msgid "**Business challenge**" msgstr "**业务挑战**" -#: ../../community/user_stories/llamafactory.md:11 +#: ../../source/community/user_stories/llamafactory.md:11 msgid "" -"LLaMA-Factory used transformers to perform inference on Ascend NPU, but the " -"speed was slow." -msgstr "LLaMA-Factory 使用 transformers 在 Ascend NPU 上进行推理,但速度较慢。" +"LLaMA-Factory uses Transformers to perform inference on Ascend NPUs, but " +"the speed is slow." +msgstr "LLaMA-Factory 使用 Transformers 在昇腾 NPU 上进行推理,但速度较慢。" -#: ../../community/user_stories/llamafactory.md:13 -msgid "**Solving Challenges and Benefits with vLLM Ascend**" -msgstr "**通过 vLLM Ascend 解决挑战与收益**" +#: ../../source/community/user_stories/llamafactory.md:13 +msgid "**Benefits with vLLM Ascend**" +msgstr "**vLLM Ascend 带来的优势**" -#: ../../community/user_stories/llamafactory.md:15 +#: ../../source/community/user_stories/llamafactory.md:15 msgid "" "With the joint efforts of LLaMA-Factory and vLLM Ascend ([LLaMA-" -"Factory#7739](https://github.com/hiyouga/LLaMA-Factory/pull/7739)), the " -"performance of LLaMA-Factory in the model inference stage has been " -"significantly improved. According to the test results, the inference speed " -"of LLaMA-Factory has been increased to 2x compared to the transformers " -"version." +"Factory#7739](https://github.com/hiyouga/LLaMA-Factory/pull/7739)), " +"LLaMA-Factory has achieved significant performance gains during model " +"inference. Benchmark results show that its inference speed is now up to " +"2× faster compared to the Transformers implementation." msgstr "" -"在 LLaMA-Factory 和 vLLM Ascend 的共同努力下(参见 [LLaMA-" -"Factory#7739](https://github.com/hiyouga/LLaMA-Factory/pull/7739)),LLaMA-" -"Factory 在模型推理阶段的性能得到了显著提升。根据测试结果,LLaMA-Factory 的推理速度相比 transformers 版本提升到了 2" -" 倍。" +"通过 LLaMA-Factory 与 vLLM Ascend 的共同努力([LLaMA-Factory#7739](https://github.com/hiyouga/LLaMA-Factory/pull/7739)),LLaMA-Factory 在模型推理阶段实现了显著的性能提升。基准测试结果表明,其推理速度相比 Transformers 实现最高提升了 2 倍。" -#: ../../community/user_stories/llamafactory.md:17 +#: ../../source/community/user_stories/llamafactory.md:17 msgid "**Learn more**" msgstr "**了解更多**" -#: ../../community/user_stories/llamafactory.md:19 +#: ../../source/community/user_stories/llamafactory.md:19 msgid "" -"See more about LLaMA-Factory and how it uses vLLM Ascend for inference on " -"the Ascend NPU in the following documentation: [LLaMA-Factory Ascend NPU " +"See more details about LLaMA-Factory and how it uses vLLM Ascend for " +"inference on Ascend NPUs in [LLaMA-Factory Ascend NPU " "Inference](https://llamafactory.readthedocs.io/en/latest/advanced/npu_inference.html)." msgstr "" -"在以下文档中查看更多关于 LLaMA-Factory 以及其如何在 Ascend NPU 上使用 vLLM Ascend 进行推理的信息:[LLaMA-" -"Factory Ascend NPU " -"推理](https://llamafactory.readthedocs.io/en/latest/advanced/npu_inference.html)。" +"有关 LLaMA-Factory 的更多详情以及它如何在昇腾 NPU 上使用 vLLM Ascend 进行推理,请参阅 [LLaMA-Factory 昇腾 NPU 推理](https://llamafactory.readthedocs.io/en/latest/advanced/npu_inference.html)。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po b/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po index 62b2a480..6f939d5f 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po @@ -4,621 +4,1077 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../community/versioning_policy.md:1 -msgid "Versioning policy" +#: ../../source/community/versioning_policy.md:1 +msgid "Versioning Policy" msgstr "版本管理策略" -#: ../../community/versioning_policy.md:3 +#: ../../source/community/versioning_policy.md:3 msgid "" "Starting with vLLM 0.7.x, the vLLM Ascend Plugin ([vllm-project/vllm-" -"ascend](https://github.com/vllm-project/vllm-ascend)) project follows the " -"[PEP 440](https://peps.python.org/pep-0440/) to publish matching with vLLM " -"([vllm-project/vllm](https://github.com/vllm-project/vllm))." +"ascend](https://github.com/vllm-project/vllm-ascend)) project follows " +"[PEP 440](https://peps.python.org/pep-0440/) to publish versions matching" +" vLLM ([vllm-project/vllm](https://github.com/vllm-project/vllm))." msgstr "" "从 vLLM 0.7.x 开始,vLLM Ascend 插件([vllm-project/vllm-" "ascend](https://github.com/vllm-project/vllm-ascend))项目遵循 [PEP " -"440](https://peps.python.org/pep-0440/) ,以与 vLLM([vllm-" -"project/vllm](https://github.com/vllm-project/vllm))版本匹配发布。" +"440](https://peps.python.org/pep-0440/) 规范发布版本,以匹配 vLLM([vllm-" +"project/vllm](https://github.com/vllm-project/vllm))的版本。" -#: ../../community/versioning_policy.md:5 +#: ../../source/community/versioning_policy.md:5 msgid "vLLM Ascend Plugin versions" msgstr "vLLM Ascend 插件版本" -#: ../../community/versioning_policy.md:7 +#: ../../source/community/versioning_policy.md:7 msgid "" -"Each vLLM Ascend release will be versioned: " +"Each vLLM Ascend release is versioned as " "`v[major].[minor].[micro][rcN][.postN]` (such as `v0.7.3rc1`, `v0.7.3`, " "`v0.7.3.post1`)" msgstr "" -"每个 vLLM Ascend 版本将采用以下版本格式:`v[major].[minor].[micro][rcN][.postN]`(例如 " +"每个 vLLM Ascend 版本采用 `v[major].[minor].[micro][rcN][.postN]` 格式(例如 " "`v0.7.3rc1`、`v0.7.3`、`v0.7.3.post1`)" -#: ../../community/versioning_policy.md:10 +#: ../../source/community/versioning_policy.md:10 msgid "" -"**Final releases**: will typically be released every **3 months**, will take" -" the vLLM upstream release plan and Ascend software product release plan " -"into comprehensive consideration." -msgstr "**正式版本**:通常每**3个月**发布一次,将综合考虑 vLLM 上游发行计划和昇腾软件产品发行计划。" +"**Final releases**: Typically scheduled every three months, with careful " +"alignment to the vLLM upstream release cycle and the Ascend software " +"product roadmap." +msgstr "**正式版本**:通常每三个月计划发布一次,会仔细对齐 vLLM 上游发布周期和昇腾软件产品路线图。" -#: ../../community/versioning_policy.md:11 +#: ../../source/community/versioning_policy.md:11 msgid "" -"**Pre releases**: will typically be released **on demand**, ending with rcN," -" represents the Nth release candidate version, to support early testing by " -"our users prior to a final release." -msgstr "**预发布版本**:通常会**按需发布**,以 rcN 结尾,表示第N个候选发布版本,旨在支持用户在正式发布前进行早期测试。" +"**Pre releases**: Typically issued **on demand**, labeled with rcN to " +"indicate the Nth release candidate. They are intended to support early " +"testing by users ahead of the final release." +msgstr "**预发布版本**:通常**按需发布**,以 rcN 标记,表示第 N 个候选发布版本,旨在支持用户在正式发布前进行早期测试。" -#: ../../community/versioning_policy.md:12 +#: ../../source/community/versioning_policy.md:12 msgid "" -"**Post releases**: will typically be released **on demand** to support to " -"address minor errors in a final release. It's different from [PEP-440 post " -"release note](https://peps.python.org/pep-0440/#post-releases) suggestion, " -"it will contain actual bug fixes considering that the final release version " -"should be matched strictly with the vLLM final release version " -"(`v[major].[minor].[micro]`). The post version has to be published as a " +"**Post releases**: Typically issued **on demand** to address minor errors" +" in a final release. Different from [PEP-440 post release " +"note](https://peps.python.org/pep-0440/#post-releases) convention, these " +"versions include actual bug fixes, as the final release version must " +"strictly align with the vLLM final release format " +"(`v[major].[minor].[micro]`). Any post version must be published as a " "patch version of the final release." msgstr "" -"**后续版本**:通常会根据需要发布,以支持解决正式发布中的小错误。这与 [PEP-440 " -"的后续版本说明](https://peps.python.org/pep-0440/#post-releases) 建议不同,它将包含实际的 bug " -"修复,因为最终发布版本应严格与 vLLM " -"的最终发布版本(`v[major].[minor].[micro]`)匹配。后续版本必须以正式发布的补丁版本形式发布。" +"**后续版本**:通常**按需发布**,用于解决正式版本中的小错误。与 [PEP-440 后续版本说明](https://peps.python.org/pep-" +"0440/#post-releases) 的惯例不同,这些版本包含实际的错误修复,因为正式发布版本必须严格与 vLLM 的正式发布格式 " +"(`v[major].[minor].[micro]`) 对齐。任何后续版本都必须作为正式版本的补丁版本发布。" -#: ../../community/versioning_policy.md:14 +#: ../../source/community/versioning_policy.md:14 msgid "For example:" msgstr "例如:" -#: ../../community/versioning_policy.md:15 -msgid "" -"`v0.7.x`: it's the first final release to match the vLLM `v0.7.x` version." -msgstr "`v0.7.x`:这是第一个与 vLLM `v0.7.x` 版本相匹配的正式发布版本。" +#: ../../source/community/versioning_policy.md:16 +msgid "`v0.7.x`: first final release to match the vLLM `v0.7.x` version." +msgstr "`v0.7.x`:首个与 vLLM `v0.7.x` 版本匹配的正式发布版本。" -#: ../../community/versioning_policy.md:16 -msgid "`v0.7.3rc1`: will be the first pre version of vLLM Ascend." -msgstr "`v0.7.3rc1`:将会是 vLLM Ascend 的第一个预发布版本。" +#: ../../source/community/versioning_policy.md:17 +msgid "`v0.7.3rc1`: first pre version of vLLM Ascend." +msgstr "`v0.7.3rc1`:vLLM Ascend 的首个预发布版本。" -#: ../../community/versioning_policy.md:17 +#: ../../source/community/versioning_policy.md:18 msgid "" -"`v0.7.3.post1`: will be the post release if the `v0.7.3` release has some " +"`v0.7.3.post1`: post release for the `v0.7.3` release if it has some " "minor errors." -msgstr "`v0.7.3.post1`:如果 `v0.7.3` 版本发布有一些小错误,将作为后续修正版发布。" +msgstr "`v0.7.3.post1`:如果 `v0.7.3` 版本存在一些小错误,将作为其后续版本发布。" -#: ../../community/versioning_policy.md:19 -msgid "Release Compatibility Matrix" +#: ../../source/community/versioning_policy.md:20 +msgid "Release compatibility matrix" msgstr "版本兼容性矩阵" -#: ../../community/versioning_policy.md:21 -msgid "Following is the Release Compatibility Matrix for vLLM Ascend Plugin:" -msgstr "以下是 vLLM Ascend 插件的版本兼容性矩阵:" +#: ../../source/community/versioning_policy.md:22 +msgid "" +"The table below is the release compatibility matrix for vLLM Ascend " +"release." +msgstr "下表是 vLLM Ascend 发布的版本兼容性矩阵。" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 msgid "vLLM Ascend" msgstr "vLLM Ascend" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 msgid "vLLM" msgstr "vLLM" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 msgid "Python" msgstr "Python" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 msgid "Stable CANN" msgstr "Stable CANN" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 msgid "PyTorch/torch_npu" msgstr "PyTorch/torch_npu" -#: ../../community/versioning_policy.md -msgid "MindIE Turbo" -msgstr "MindIE Turbo" +#: ../../source/community/versioning_policy.md +msgid "Triton Ascend" +msgstr "Triton Ascend" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md +msgid "v0.17.0rc1" +msgstr "v0.17.0rc1" + +#: ../../source/community/versioning_policy.md +msgid "v0.17.0" +msgstr "v0.17.0" + +#: ../../source/community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 +msgid ">= 3.10, < 3.12" +msgstr ">= 3.10, < 3.12" + +#: ../../source/community/versioning_policy.md +msgid "8.5.1" +msgstr "8.5.1" + +#: ../../source/community/versioning_policy.md +msgid "2.9.0 / 2.9.0" +msgstr "2.9.0 / 2.9.0" + +#: ../../source/community/versioning_policy.md +msgid "3.2.0" +msgstr "3.2.0" + +#: ../../source/community/versioning_policy.md +msgid "v0.16.0rc1" +msgstr "v0.16.0rc1" + +#: ../../source/community/versioning_policy.md +msgid "v0.16.0" +msgstr "v0.16.0" + +#: ../../source/community/versioning_policy.md +msgid "v0.15.0rc1" +msgstr "v0.15.0rc1" + +#: ../../source/community/versioning_policy.md +msgid "v0.15.0" +msgstr "v0.15.0" + +#: ../../source/community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 +msgid "8.5.0" +msgstr "8.5.0" + +#: ../../source/community/versioning_policy.md +msgid "v0.14.0rc1" +msgstr "v0.14.0rc1" + +#: ../../source/community/versioning_policy.md +msgid "v0.14.1" +msgstr "v0.14.1" + +#: ../../source/community/versioning_policy.md +msgid "v0.13.0" +msgstr "v0.13.0" + +#: ../../source/community/versioning_policy.md +msgid "2.9.0 / 2.8.0.post2" +msgstr "2.9.0 / 2.8.0.post2" + +#: ../../source/community/versioning_policy.md +msgid "v0.13.0rc2" +msgstr "v0.13.0rc2" + +#: ../../source/community/versioning_policy.md +msgid "2.8.0 / 2.8.0.post1" +msgstr "2.8.0 / 2.8.0.post1" + +#: ../../source/community/versioning_policy.md +msgid "v0.13.0rc1" +msgstr "v0.13.0rc1" + +#: ../../source/community/versioning_policy.md +msgid "8.3.RC2" +msgstr "8.3.RC2" + +#: ../../source/community/versioning_policy.md +msgid "2.8.0 / 2.8.0" +msgstr "2.8.0 / 2.8.0" + +#: ../../source/community/versioning_policy.md +msgid "v0.12.0rc1" +msgstr "v0.12.0rc1" + +#: ../../source/community/versioning_policy.md +msgid "v0.12.0" +msgstr "v0.12.0" + +#: ../../source/community/versioning_policy.md +msgid "v0.11.0" +msgstr "v0.11.0" + +#: ../../source/community/versioning_policy.md +msgid ">= 3.9, < 3.12" +msgstr ">= 3.9, < 3.12" + +#: ../../source/community/versioning_policy.md +msgid "2.7.1 / 2.7.1.post1" +msgstr "2.7.1 / 2.7.1.post1" + +#: ../../source/community/versioning_policy.md +msgid "v0.11.0rc3" +msgstr "v0.11.0rc3" + +#: ../../source/community/versioning_policy.md +msgid "v0.11.0rc2" +msgstr "v0.11.0rc2" + +#: ../../source/community/versioning_policy.md +msgid "2.7.1 / 2.7.1" +msgstr "2.7.1 / 2.7.1" + +#: ../../source/community/versioning_policy.md +msgid "v0.11.0rc1" +msgstr "v0.11.0rc1" + +#: ../../source/community/versioning_policy.md +msgid "8.3.RC1" +msgstr "8.3.RC1" + +#: ../../source/community/versioning_policy.md +msgid "v0.11.0rc0" +msgstr "v0.11.0rc0" + +#: ../../source/community/versioning_policy.md +msgid "8.2.RC1" +msgstr "8.2.RC1" + +#: ../../source/community/versioning_policy.md +msgid "2.7.1 / 2.7.1.dev20250724" +msgstr "2.7.1 / 2.7.1.dev20250724" + +#: ../../source/community/versioning_policy.md +msgid "v0.10.2rc1" +msgstr "v0.10.2rc1" + +#: ../../source/community/versioning_policy.md +msgid "v0.10.2" +msgstr "v0.10.2" + +#: ../../source/community/versioning_policy.md +msgid "v0.10.1rc1" +msgstr "v0.10.1rc1" + +#: ../../source/community/versioning_policy.md +msgid "v0.10.1/v0.10.1.1" +msgstr "v0.10.1/v0.10.1.1" + +#: ../../source/community/versioning_policy.md +msgid "v0.10.0rc1" +msgstr "v0.10.0rc1" + +#: ../../source/community/versioning_policy.md +msgid "v0.10.0" +msgstr "v0.10.0" + +#: ../../source/community/versioning_policy.md msgid "v0.9.2rc1" msgstr "v0.9.2rc1" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md msgid "v0.9.2" msgstr "v0.9.2" -#: ../../community/versioning_policy.md -msgid ">= 3.9, < 3.12" -msgstr ">= 3.9,< 3.12" - -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md msgid "8.1.RC1" msgstr "8.1.RC1" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md msgid "2.5.1 / 2.5.1.post1.dev20250619" msgstr "2.5.1 / 2.5.1.post1.dev20250619" -#: ../../community/versioning_policy.md -msgid "v0.9.1rc1" -msgstr "v0.9.1rc1" - -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md msgid "v0.9.1" msgstr "v0.9.1" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md +msgid "2.5.1 / 2.5.1.post1" +msgstr "2.5.1 / 2.5.1.post1" + +#: ../../source/community/versioning_policy.md +msgid "v0.9.1rc3" +msgstr "v0.9.1rc3" + +#: ../../source/community/versioning_policy.md +msgid "v0.9.1rc2" +msgstr "v0.9.1rc2" + +#: ../../source/community/versioning_policy.md +msgid "v0.9.1rc1" +msgstr "v0.9.1rc1" + +#: ../../source/community/versioning_policy.md msgid "2.5.1 / 2.5.1.post1.dev20250528" msgstr "2.5.1 / 2.5.1.post1.dev20250528" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md msgid "v0.9.0rc2" msgstr "v0.9.0rc2" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md msgid "v0.9.0" msgstr "v0.9.0" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md msgid "2.5.1 / 2.5.1" msgstr "2.5.1 / 2.5.1" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md msgid "v0.9.0rc1" msgstr "v0.9.0rc1" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md msgid "v0.8.5rc1" msgstr "v0.8.5rc1" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md msgid "v0.8.5.post1" msgstr "v0.8.5.post1" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md msgid "v0.8.4rc2" msgstr "v0.8.4rc2" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md msgid "v0.8.4" msgstr "v0.8.4" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md msgid "8.0.0" msgstr "8.0.0" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md msgid "v0.7.3.post1" msgstr "v0.7.3.post1" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md msgid "v0.7.3" msgstr "v0.7.3" -#: ../../community/versioning_policy.md -msgid "2.0rc1" -msgstr "2.0候选版本1" - -#: ../../community/versioning_policy.md:34 -msgid "Release cadence" -msgstr "发布节奏" - -#: ../../community/versioning_policy.md:36 -msgid "release window" -msgstr "发布窗口" - -#: ../../community/versioning_policy.md -msgid "Date" -msgstr "日期" - -#: ../../community/versioning_policy.md -msgid "Event" -msgstr "事件" - -#: ../../community/versioning_policy.md -msgid "2025.07.11" -msgstr "2025.07.11" - -#: ../../community/versioning_policy.md -msgid "Release candidates, v0.9.2rc1" -msgstr "候选发布版本,v0.9.2rc1" - -#: ../../community/versioning_policy.md -msgid "2025.06.22" -msgstr "2025.06.22" - -#: ../../community/versioning_policy.md -msgid "Release candidates, v0.9.1rc1" -msgstr "候选发布版本,v0.9.1rc1" - -#: ../../community/versioning_policy.md -msgid "2025.06.10" -msgstr "2025.06.10" - -#: ../../community/versioning_policy.md -msgid "Release candidates, v0.9.0rc2" -msgstr "候选发布版本,v0.9.0rc2" - -#: ../../community/versioning_policy.md -msgid "2025.06.09" -msgstr "2025.06.09" - -#: ../../community/versioning_policy.md -msgid "Release candidates, v0.9.0rc1" -msgstr "候选发布版本本,v0.9.0rc1" - -#: ../../community/versioning_policy.md -msgid "2025.05.29" -msgstr "2025.05.29" - -#: ../../community/versioning_policy.md -msgid "v0.7.x post release, v0.7.3.post1" -msgstr "v0.7.x 补丁版,v0.7.3.post1" - -#: ../../community/versioning_policy.md -msgid "2025.05.08" -msgstr "2025.05.08" - -#: ../../community/versioning_policy.md -msgid "v0.7.x Final release, v0.7.3" -msgstr "v0.7.x 正式版,v0.7.3" - -#: ../../community/versioning_policy.md -msgid "2025.05.06" -msgstr "2025.05.06" - -#: ../../community/versioning_policy.md -msgid "Release candidates, v0.8.5rc1" -msgstr "候选发布版本,v0.8.5rc1" - -#: ../../community/versioning_policy.md -msgid "2025.04.28" -msgstr "2025.04.28" - -#: ../../community/versioning_policy.md -msgid "Release candidates, v0.8.4rc2" -msgstr "候选发布版本,v0.8.4rc2" - -#: ../../community/versioning_policy.md -msgid "2025.04.18" -msgstr "2025.04.18" - -#: ../../community/versioning_policy.md -msgid "Release candidates, v0.8.4rc1" -msgstr "候选发布版本,v0.8.4rc1" - -#: ../../community/versioning_policy.md -msgid "2025.03.28" -msgstr "2025.03.28" - -#: ../../community/versioning_policy.md -msgid "Release candidates, v0.7.3rc2" -msgstr "候选发布版本,v0.7.3rc2" - -#: ../../community/versioning_policy.md -msgid "2025.03.14" -msgstr "2025.03.14" - -#: ../../community/versioning_policy.md -msgid "Release candidates, v0.7.3rc1" -msgstr "候选发布版本,v0.7.3rc1" - -#: ../../community/versioning_policy.md -msgid "2025.02.19" -msgstr "2025.02.19" - -#: ../../community/versioning_policy.md -msgid "Release candidates, v0.7.1rc1" -msgstr "候选发布版本,v0.7.1rc1" - -#: ../../community/versioning_policy.md:53 -msgid "Branch policy" -msgstr "分支策略" - -#: ../../community/versioning_policy.md:55 -msgid "vLLM Ascend has main branch and dev branch." -msgstr "vLLM Ascend 有主分支和开发分支。" - -#: ../../community/versioning_policy.md:57 +#: ../../source/community/versioning_policy.md:55 msgid "" -"**main**: main branch,corresponds to the vLLM main branch and latest 1 or 2 " -"release version. It is continuously monitored for quality through Ascend CI." -msgstr "**main**:main 分支,对应 vLLM 的主分支和最新的 1 或 2 个发布版本。该分支通过 Ascend CI 持续监控质量。" +"If you're using v0.7.3, don't forget to install [mindie-" +"turbo](https://pypi.org/project/mindie-turbo) as well." +msgstr "如果您正在使用 v0.7.3,请别忘了同时安装 [mindie-turbo](https://pypi.org/project/mindie-turbo)。" -#: ../../community/versioning_policy.md:58 +#: ../../source/community/versioning_policy.md:58 msgid "" -"**vX.Y.Z-dev**: development branch, created with part of new releases of " -"vLLM. For example, `v0.7.3-dev` is the dev branch for vLLM `v0.7.3` version." -msgstr "" -"**vX.Y.Z-dev**:开发分支,是随着 vLLM 新版本的一部分一起创建的。例如,`v0.7.3-dev` 是 vLLM `v0.7.3` " -"版本的开发分支。" +"For main branch of vLLM Ascend, we usually make it compatible with the " +"latest vLLM release and a newer commit hash of vLLM. Please note that " +"this table is usually updated. Please check it regularly." +msgstr "对于 vLLM Ascend 的 main 分支,我们通常会使其与最新的 vLLM 发布版本以及更新的 vLLM 提交哈希兼容。请注意,此表格会经常更新,请定期查看。" -#: ../../community/versioning_policy.md:60 -msgid "" -"Usually, a commit should be ONLY first merged in the main branch, and then " -"backported to the dev branch to reduce maintenance costs as much as " -"possible." -msgstr "通常,提交应该只先合并到主分支,然后再回溯合并到开发分支,以尽可能降低维护成本。" - -#: ../../community/versioning_policy.md:62 -msgid "Maintenance branch and EOL:" -msgstr "维护分支与生命周期结束(EOL):" - -#: ../../community/versioning_policy.md:63 -msgid "The branch status will be in one of the following states:" -msgstr "分支状态将处于以下几种状态之一:" - -#: ../../community/versioning_policy.md -msgid "Branch" -msgstr "分支" - -#: ../../community/versioning_policy.md -msgid "Time frame" -msgstr "时间范围" - -#: ../../community/versioning_policy.md -msgid "Summary" -msgstr "摘要" - -#: ../../community/versioning_policy.md -msgid "Maintained" -msgstr "维护中" - -#: ../../community/versioning_policy.md -msgid "Approximately 2-3 minor versions" -msgstr "大约 2-3 个小版本" - -#: ../../community/versioning_policy.md -msgid "All bugfixes are appropriate. Releases produced, CI commitment." -msgstr "所有的错误修复都是合适的。正常发布版本,持续集成承诺。" - -#: ../../community/versioning_policy.md -msgid "Unmaintained" -msgstr "无人维护" - -#: ../../community/versioning_policy.md -msgid "Community interest driven" -msgstr "社区兴趣驱动" - -#: ../../community/versioning_policy.md -msgid "All bugfixes are appropriate. No Releases produced, No CI commitment" -msgstr "所有的 bug 修复都是合适的。没有发布版本,不承诺持续集成(CI)。" - -#: ../../community/versioning_policy.md -msgid "End of Life (EOL)" -msgstr "生命周期结束(EOL)" - -#: ../../community/versioning_policy.md -msgid "N/A" -msgstr "不适用" - -#: ../../community/versioning_policy.md -msgid "Branch no longer accepting changes" -msgstr "该分支不再接受更改" - -#: ../../community/versioning_policy.md:71 -msgid "Branch state" -msgstr "分支状态" - -#: ../../community/versioning_policy.md:73 -msgid "" -"Note that vLLM Ascend will only be released for a certain vLLM release " -"version rather than all versions. Hence, You might see only part of versions" -" have dev branches (such as only `0.7.1-dev` / `0.7.3-dev` but no " -"`0.7.2-dev`), this is as expected." -msgstr "" -"请注意,vLLM Ascend 只会针对某些 vLLM 发布版本发布,而不是所有版本。因此,您可能会看到只有部分版本拥有开发分支(例如只有 " -"`0.7.1-dev` / `0.7.3-dev`,而没有 `0.7.2-dev`),这是正常现象。" - -#: ../../community/versioning_policy.md:75 -msgid "" -"Usually, each minor version of vLLM (such as 0.7) will correspond to a vLLM " -"Ascend version branch and support its latest version (for example, we plan " -"to support version 0.7.3) as following shown:" -msgstr "" -"通常,vLLM 的每一个小版本(例如 0.7)都会对应一个 vLLM Ascend 版本分支,并支持其最新版本(例如,我们计划支持 0.7.3 " -"版),如下所示:" - -#: ../../community/versioning_policy.md -msgid "Status" -msgstr "状态" - -#: ../../community/versioning_policy.md -msgid "Note" -msgstr "注释" - -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 msgid "main" msgstr "main" -#: ../../community/versioning_policy.md -msgid "CI commitment for vLLM main branch and vLLM 0.9.2 branch" -msgstr "vLLM 主分支和 vLLM 0.9.2 分支的 CI 承诺" +#: ../../source/community/versioning_policy.md:54 +msgid "v0.18.0 tag" +msgstr "v0.18.0 标签" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 +msgid "2.9.0 / 2.9.0" +msgstr "2.9.0 / 2.9.0" + +#: ../../source/community/versioning_policy.md:64 +msgid "Release cadence" +msgstr "发布节奏" + +#: ../../source/community/versioning_policy.md:66 +msgid "Release window" +msgstr "发布窗口" + +#: ../../source/community/versioning_policy.md:54 +msgid "Date" +msgstr "日期" + +#: ../../source/community/versioning_policy.md:54 +msgid "Event" +msgstr "事件" + +#: ../../source/community/versioning_policy.md:54 +msgid "2026.03.15" +msgstr "2026.03.15" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.17.0rc1" +msgstr "候选发布版本,v0.17.0rc1" + +#: ../../source/community/versioning_policy.md:54 +msgid "2026.03.10" +msgstr "2026.03.10" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.16.0rc1" +msgstr "候选发布版本,v0.16.0rc1" + +#: ../../source/community/versioning_policy.md:54 +msgid "2026.02.27" +msgstr "2026.02.27" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.15.0rc1" +msgstr "候选发布版本,v0.15.0rc1" + +#: ../../source/community/versioning_policy.md:54 +msgid "2026.02.06" +msgstr "2026.02.06" + +#: ../../source/community/versioning_policy.md:54 +msgid "v0.13.0 Final release, v0.13.0" +msgstr "v0.13.0 正式版,v0.13.0" + +#: ../../source/community/versioning_policy.md:54 +msgid "2026.01.26" +msgstr "2026.01.26" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.14.0rc1" +msgstr "候选发布版本,v0.14.0rc1" + +#: ../../source/community/versioning_policy.md:54 +msgid "2026.01.24" +msgstr "2026.01.24" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.13.0rc2" +msgstr "候选发布版本,v0.13.0rc2" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.12.27" +msgstr "2025.12.27" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.13.0rc1" +msgstr "候选发布版本,v0.13.0rc1" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.12.16" +msgstr "2025.12.16" + +#: ../../source/community/versioning_policy.md:54 +msgid "v0.11.0 Final release, v0.11.0" +msgstr "v0.11.0 正式版,v0.11.0" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.12.13" +msgstr "2025.12.13" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.12.0rc1" +msgstr "候选发布版本,v0.12.0rc1" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.12.03" +msgstr "2025.12.03" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.11.0rc3" +msgstr "候选发布版本,v0.11.0rc3" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.11.21" +msgstr "2025.11.21" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.11.0rc2" +msgstr "候选发布版本,v0.11.0rc2" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.11.10" +msgstr "2025.11.10" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.11.0rc1" +msgstr "候选发布版本,v0.11.0rc1" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.09.30" +msgstr "2025.09.30" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.11.0rc0" +msgstr "候选发布版本,v0.11.0rc0" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.09.16" +msgstr "2025.09.16" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.10.2rc1" +msgstr "候选发布版本,v0.10.2rc1" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.09.04" +msgstr "2025.09.04" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.10.1rc1" +msgstr "候选发布版本,v0.10.1rc1" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.09.03" +msgstr "2025.09.03" + +#: ../../source/community/versioning_policy.md:54 +msgid "v0.9.1 Final release, v0.9.1" +msgstr "v0.9.1 正式版,v0.9.1" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.08.22" +msgstr "2025.08.22" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.9.1rc3" +msgstr "候选发布版本,v0.9.1rc3" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.08.07" +msgstr "2025.08.07" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.10.0rc1" +msgstr "候选发布版本,v0.10.0rc1" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.08.04" +msgstr "2025.08.04" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.9.1rc2" +msgstr "候选发布版本,v0.9.1rc2" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.07.11" +msgstr "2025.07.11" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.9.2rc1" +msgstr "候选发布版本,v0.9.2rc1" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.06.22" +msgstr "2025.06.22" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.9.1rc1" +msgstr "候选发布版本,v0.9.1rc1" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.06.10" +msgstr "2025.06.10" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.9.0rc2" +msgstr "候选发布版本,v0.9.0rc2" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.06.09" +msgstr "2025.06.09" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.9.0rc1" +msgstr "候选发布版本,v0.9.0rc1" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.05.29" +msgstr "2025.05.29" + +#: ../../source/community/versioning_policy.md:54 +msgid "v0.7.3 post release, v0.7.3.post1" +msgstr "v0.7.3 补丁版,v0.7.3.post1" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.05.08" +msgstr "2025.05.08" + +#: ../../source/community/versioning_policy.md:54 +msgid "v0.7.3 Final release, v0.7.3" +msgstr "v0.7.3 正式版,v0.7.3" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.05.06" +msgstr "2025.05.06" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.8.5rc1" +msgstr "候选发布版本,v0.8.5rc1" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.04.28" +msgstr "2025.04.28" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.8.4rc2" +msgstr "候选发布版本,v0.8.4rc2" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.04.18" +msgstr "2025.04.18" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.8.4rc1" +msgstr "候选发布版本,v0.8.4rc1" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.03.28" +msgstr "2025.03.28" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.7.3rc2" +msgstr "候选发布版本,v0.7.3rc2" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.03.14" +msgstr "2025.03.14" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.7.3rc1" +msgstr "候选发布版本,v0.7.3rc1" + +#: ../../source/community/versioning_policy.md:54 +msgid "2025.02.19" +msgstr "2025.02.19" + +#: ../../source/community/versioning_policy.md:54 +msgid "Release candidates, v0.7.1rc1" +msgstr "候选发布版本,v0.7.1rc1" + +#: ../../source/community/versioning_policy.md:102 +msgid "Branch policy" +msgstr "分支策略" + +#: ../../source/community/versioning_policy.md:104 +msgid "vLLM Ascend includes two branches: main and dev." +msgstr "vLLM Ascend 包含两个分支:main 和 dev。" + +#: ../../source/community/versioning_policy.md:106 +msgid "" +"**main**: corresponds to the vLLM main branch and latest 1 or 2 release " +"version. It is continuously monitored for quality through Ascend CI." +msgstr "**main**:对应 vLLM 的主分支和最新的 1 或 2 个发布版本。该分支通过 Ascend CI 持续进行质量监控。" + +#: ../../source/community/versioning_policy.md:107 +msgid "" +"**releases/vX.Y.Z**: development branch, created with part of new " +"releases of vLLM. For example, `releases/v0.13.0` is the dev branch for " +"vLLM `v0.13.0` version." +msgstr "**releases/vX.Y.Z**:开发分支,随 vLLM 新版本的一部分创建。例如,`releases/v0.13.0` 是 vLLM `v0.13.0` 版本的开发分支。" + +#: ../../source/community/versioning_policy.md:109 +msgid "" +"Commits should typically be merged into the main branch first, and only " +"then backported to the dev branch, to reduce maintenance costs as much as" +" possible." +msgstr "通常,提交应首先合并到主分支,然后再反向移植到开发分支,以尽可能降低维护成本。" + +#: ../../source/community/versioning_policy.md:111 +msgid "Maintenance branch and EOL" +msgstr "维护分支与生命周期结束(EOL)" + +#: ../../source/community/versioning_policy.md:113 +msgid "The table below lists branch states." +msgstr "下表列出了分支状态。" + +#: ../../source/community/versioning_policy.md:54 +msgid "Branch" +msgstr "分支" + +#: ../../source/community/versioning_policy.md:54 +msgid "Time Frame" +msgstr "时间范围" + +#: ../../source/community/versioning_policy.md:54 +msgid "Summary" +msgstr "摘要" + +#: ../../source/community/versioning_policy.md:54 +msgid "Maintained" +msgstr "维护中" + +#: ../../source/community/versioning_policy.md:54 +msgid "Approximately 2-3 minor versions" +msgstr "大约 2-3 个小版本" + +#: ../../source/community/versioning_policy.md:54 +msgid "Bugfixes received; releases produced; CI commitment" +msgstr "接收错误修复;生成发布版本;承诺持续集成(CI)" + +#: ../../source/community/versioning_policy.md:54 +msgid "Unmaintained" +msgstr "无人维护" + +#: ../../source/community/versioning_policy.md:54 +msgid "Community-interest driven" +msgstr "社区兴趣驱动" + +#: ../../source/community/versioning_policy.md:54 +msgid "Bugfixes received; no releases produced; no CI commitment" +msgstr "接收错误修复;不生成发布版本;不承诺持续集成(CI)" + +#: ../../source/community/versioning_policy.md:54 +msgid "End of Life (EOL)" +msgstr "生命周期结束(EOL)" + +#: ../../source/community/versioning_policy.md:54 +msgid "N/A" +msgstr "不适用" + +#: ../../source/community/versioning_policy.md:54 +msgid "Branch no longer accepting changes" +msgstr "该分支不再接受更改" + +#: ../../source/community/versioning_policy.md:121 +msgid "Branch states" +msgstr "分支状态" + +#: ../../source/community/versioning_policy.md:123 +msgid "" +"Note that vLLM Ascend will only be released for a certain vLLM release " +"version, not for every version. Hence, you may notice that some versions " +"have corresponding dev branches (e.g. `releases/v0.13.0`), while others " +"do not (e.g. `releases/v0.12.0`). The vLLM Ascend release branch now " +"follows the `releases/vX.Y.Z` naming convention, replacing the previous " +"`vX.Y.Z-dev` format to align with vLLM's branch naming standards." +msgstr "请注意,vLLM Ascend 仅针对特定的 vLLM 发布版本进行发布,而非每个版本。因此,您可能会注意到某些版本有对应的开发分支(例如 `releases/v0.13.0`),而其他版本则没有(例如 `releases/v0.12.0`)。vLLM Ascend 的发布分支现在遵循 `releases/vX.Y.Z` 命名约定,取代了之前的 `vX.Y.Z-dev` 格式,以与 vLLM 的分支命名标准保持一致。" + +#: ../../source/community/versioning_policy.md:125 +msgid "" +"Usually, each minor version of vLLM (such as 0.7) corresponds to a vLLM " +"Ascend version branch and supports its latest version (such as 0.7.3), as" +" shown below:" +msgstr "通常,vLLM 的每个小版本(例如 0.7)都对应一个 vLLM Ascend 版本分支,并支持其最新版本(例如 0.7.3),如下所示:" + +#: ../../source/community/versioning_policy.md:54 +msgid "State" +msgstr "状态" + +#: ../../source/community/versioning_policy.md:54 +msgid "Note" +msgstr "注释" + +#: ../../source/community/versioning_policy.md:54 +msgid "CI commitment for vLLM main branch and vLLM 0.16.0 tag" +msgstr "对 vLLM 主分支和 vLLM 0.16.0 标签的 CI 承诺" + +#: ../../source/community/versioning_policy.md:54 +msgid "releases/v0.13.0" +msgstr "releases/v0.13.0" + +#: ../../source/community/versioning_policy.md:54 +msgid "CI commitment for vLLM 0.13.0 version" +msgstr "对 vLLM 0.13.0 版本的 CI 承诺" + +#: ../../source/community/versioning_policy.md:54 +msgid "v0.11.0-dev" +msgstr "v0.11.0-dev" + +#: ../../source/community/versioning_policy.md:54 +msgid "CI commitment for vLLM 0.11.0 version" +msgstr "对 vLLM 0.11.0 版本的 CI 承诺" + +#: ../../source/community/versioning_policy.md:54 msgid "v0.9.1-dev" msgstr "v0.9.1-dev" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 msgid "CI commitment for vLLM 0.9.1 version" -msgstr "vLLM 0.9.1 版本的 CI 承诺" +msgstr "对 vLLM 0.9.1 版本的 CI 承诺" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 msgid "v0.7.3-dev" msgstr "v0.7.3-dev" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 msgid "CI commitment for vLLM 0.7.3 version" -msgstr "vLLM 0.7.3 版本的 CI 承诺" +msgstr "对 vLLM 0.7.3 版本的 CI 承诺" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 msgid "v0.7.1-dev" msgstr "v0.7.1-dev" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 msgid "Replaced by v0.7.3-dev" msgstr "已被 v0.7.3-dev 替代" -#: ../../community/versioning_policy.md:84 +#: ../../source/community/versioning_policy.md:136 +msgid "Feature branches" +msgstr "特性分支" + +#: ../../source/community/versioning_policy.md:54 +msgid "RFC Link" +msgstr "RFC 链接" + +#: ../../source/community/versioning_policy.md:54 +msgid "Scheduled Merge Time" +msgstr "计划合并时间" + +#: ../../source/community/versioning_policy.md:54 +msgid "Mentor" +msgstr "导师" + +#: ../../source/community/versioning_policy.md:54 +msgid "rfc/long_seq_optimization" +msgstr "rfc/long_seq_optimization" + +#: ../../source/community/versioning_policy.md:54 +msgid "" +msgstr "" + +#: ../../source/community/versioning_policy.md:54 +msgid "930" +msgstr "930" + +#: ../../source/community/versioning_policy.md:54 +msgid "wangxiyuan" +msgstr "wangxiyuan" + +#: ../../source/community/versioning_policy.md:142 +msgid "" +"Branch: The feature branch should be created with a prefix `rfc/` " +"followed by the feature name, such as `rfc/feature-name`." +msgstr "分支:特性分支应以 `rfc/` 为前缀,后接特性名称创建,例如 `rfc/feature-name`。" + +#: ../../source/community/versioning_policy.md:143 +msgid "" +"State: The state of the feature branch is `Maintained` until it is merged" +" into the main branch or deleted." +msgstr "状态:特性分支的状态为 `Maintained`,直至其被合并到主分支或删除。" + +#: ../../source/community/versioning_policy.md:144 +msgid "" +"RFC Link: The feature branch should be created with a corresponding RFC " +"issue. The creation of a feature branch requires an RFC and approval from" +" at least two maintainers." +msgstr "RFC 链接:特性分支应随对应的 RFC 议题一同创建。创建特性分支需要 RFC 并获得至少两位维护者的批准。" + +#: ../../source/community/versioning_policy.md:145 +msgid "" +"Scheduled Merge Time: The final goal of a feature branch is to be merged " +"into the main branch. If it remains unmerged for more than three months, " +"the mentor maintainer should evaluate whether to delete the branch." +msgstr "计划合并时间:特性分支的最终目标是合并到主分支。如果超过三个月仍未合并,导师维护者应评估是否删除该分支。" + +#: ../../source/community/versioning_policy.md:146 +msgid "" +"Mentor: The mentor should be a vLLM Ascend maintainer who is responsible " +"for the feature branch." +msgstr "导师:导师应为负责该特性分支的 vLLM Ascend 维护者。" + +#: ../../source/community/versioning_policy.md:148 msgid "Backward compatibility" msgstr "向后兼容性" -#: ../../community/versioning_policy.md:86 +#: ../../source/community/versioning_policy.md:150 msgid "" -"For main branch, vLLM Ascend should works with vLLM main branch and latest 1" -" or 2 release version. So to ensure the backward compatibility, we will do " -"the following:" -msgstr "" -"对于主分支,vLLM Ascend 应该与 vLLM 主分支以及最新的 1 或 2 个发布版本兼容。因此,为了确保向后兼容性,我们将执行以下操作:" +"For main branch, vLLM Ascend should work with vLLM main branch and latest" +" 1 or 2 releases. To ensure backward compatibility, do as follows:" +msgstr "对于主分支,vLLM Ascend 应与 vLLM 主分支及最新的 1 或 2 个发布版本兼容。为确保向后兼容性,请按以下步骤操作:" -#: ../../community/versioning_policy.md:87 +#: ../../source/community/versioning_policy.md:152 msgid "" -"Both main branch and target vLLM release is tested by Ascend E2E CI. For " -"example, currently, vLLM main branch and vLLM 0.8.4 are tested now." -msgstr "主分支和目标 vLLM 发行版都经过了 Ascend E2E CI 的测试。例如,目前正在测试 vLLM 主分支和 vLLM 0.8.4。" +"Both main branch and target vLLM release, such as the vLLM main branch " +"and vLLM 0.8.4, are tested by Ascend E2E CI." +msgstr "主分支和目标 vLLM 发布版本(例如 vLLM 主分支和 vLLM 0.8.4)均通过 Ascend E2E CI 进行测试。" -#: ../../community/versioning_policy.md:88 +#: ../../source/community/versioning_policy.md:153 msgid "" -"For code changes, we will make sure that the changes are compatible with the" -" latest 1 or 2 vLLM release version as well. In this case, vLLM Ascend " -"introduced a version check machinism inner the code. It'll check the version" -" of installed vLLM package first to decide which code logic to use. If users" -" hit the `InvalidVersion` error, it sometimes means that they have installed" -" an dev/editable version of vLLM package. In this case, we provide the env " -"variable `VLLM_VERSION` to let users specify the version of vLLM package to " -"use." -msgstr "" -"对于代码更改,我们也会确保这些更改与最新的 1 或 2 个 vLLM 发行版本兼容。在这种情况下,vLLM Ascend " -"在代码中引入了版本检查机制。它会先检查已安装的 vLLM 包的版本,然后决定使用哪段代码逻辑。如果用户遇到 `InvalidVersion` " -"错误,这有时意味着他们安装了 dev/可编辑版本的 vLLM 包。此时,我们提供了环境变量 `VLLM_VERSION`,让用户可以指定要使用的 " -"vLLM 包版本。" +"To make sure that code changes are compatible with the latest 1 or 2 vLLM" +" releases, vLLM Ascend introduces a version check mechanism inside the " +"code. It checks the version of the installed vLLM package first to decide" +" which code logic to use. If users hit the `InvalidVersion` error, it may" +" indicate that they have installed a dev or editable version of vLLM " +"package. In this case, we provide the env variable `VLLM_VERSION` to let " +"users specify the version of vLLM package to use." +msgstr "为确保代码更改与最新的 1 或 2 个 vLLM 发布版本兼容,vLLM Ascend 在代码中引入了版本检查机制。它首先检查已安装的 vLLM 包的版本,以决定使用哪段代码逻辑。如果用户遇到 `InvalidVersion` 错误,可能表明他们安装了开发版或可编辑版本的 vLLM 包。在这种情况下,我们提供了环境变量 `VLLM_VERSION`,允许用户指定要使用的 vLLM 包版本。" -#: ../../community/versioning_policy.md:89 +#: ../../source/community/versioning_policy.md:154 msgid "" -"For documentation changes, we will make sure that the changes are compatible" -" with the latest 1 or 2 vLLM release version as well. Note should be added " -"if there are any breaking changes." -msgstr "对于文档更改,我们会确保这些更改也兼容于最新的1个或2个 vLLM 发布版本。如果有任何重大变更,应添加说明。" +"Document changes should be compatible with the latest 1 or 2 vLLM " +"releases. Notes should be added if there are any breaking changes." +msgstr "文档更改应与最新的 1 或 2 个 vLLM 发布版本兼容。如有任何破坏性变更,应添加说明。" -#: ../../community/versioning_policy.md:91 -msgid "Document Branch Policy" -msgstr "文档分支政策" +#: ../../source/community/versioning_policy.md:156 +msgid "Document branch policy" +msgstr "文档分支策略" -#: ../../community/versioning_policy.md:92 +#: ../../source/community/versioning_policy.md:158 msgid "" "To reduce maintenance costs, **all branch documentation content should " -"remain consistent, and version differences can be controlled via variables " -"in [docs/source/conf.py](https://github.com/vllm-project/vllm-" -"ascend/blob/main/docs/source/conf.py)**. While this is not a simple task, it" -" is a principle we should strive to follow." -msgstr "" -"为了减少维护成本,**所有分支的文档内容应保持一致,版本差异可以通过 " -"[docs/source/conf.py](https://github.com/vllm-project/vllm-" -"ascend/blob/main/docs/source/conf.py) 中的变量进行控制**。虽然这并非易事,但这是我们应当努力遵循的原则。" +"remain consistent, and version differences can be controlled via " +"variables in [docs/source/conf.py](https://github.com/vllm-project/vllm-" +"ascend/blob/main/docs/source/conf.py)**. While this is not a simple task," +" it is a principle we should strive to follow." +msgstr "为降低维护成本,**所有分支的文档内容应保持一致,版本差异可通过 [docs/source/conf.py](https://github.com/vllm-project/vllm-ascend/blob/main/docs/source/conf.py) 中的变量进行控制**。虽然这并非易事,但这是我们应努力遵循的原则。" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 msgid "Version" msgstr "版本" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 msgid "Purpose" msgstr "用途" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 msgid "Code Branch" msgstr "代码分支" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 msgid "latest" msgstr "最新" -#: ../../community/versioning_policy.md -msgid "Doc for the latest dev branch" -msgstr "最新开发分支的文档" +#: ../../source/community/versioning_policy.md:54 +msgid "Doc for the latest rc release of main branch" +msgstr "主分支最新候选发布版本的文档" -#: ../../community/versioning_policy.md -msgid "vX.Y.Z-dev (Will be `main` after the first final release)" -msgstr "vX.Y.Z-dev(在第一个正式版本发布后将成为 `main`)" +#: ../../source/community/versioning_policy.md:54 +msgid "`main` branch" +msgstr "`main` 分支" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 +msgid "rc version" +msgstr "候选版本" + +#: ../../source/community/versioning_policy.md:54 +msgid "Doc for RC released versions" +msgstr "候选发布版本的文档" + +#: ../../source/community/versioning_policy.md:54 +msgid "`vX.Y.ZrcN` --> `vX.Y.ZrcN` tag" +msgstr "`vX.Y.ZrcN` --> `vX.Y.ZrcN` 标签" + +#: ../../source/community/versioning_policy.md:54 msgid "version" msgstr "版本" -#: ../../community/versioning_policy.md +#: ../../source/community/versioning_policy.md:54 msgid "Doc for historical released versions" -msgstr "历史版本文档" +msgstr "历史发布版本的文档" -#: ../../community/versioning_policy.md -msgid "Git tags, like vX.Y.Z[rcN]" -msgstr "Git 标签,如 vX.Y.Z[rcN]" +#: ../../source/community/versioning_policy.md:54 +msgid "`vX.Y.Z` --> `releases/vX.Y.Z` branch" +msgstr "`vX.Y.Z` --> `releases/vX.Y.Z` 分支" -#: ../../community/versioning_policy.md -msgid "stable(not yet released)" -msgstr "稳定版(尚未发布)" +#: ../../source/community/versioning_policy.md:166 +msgid "Notes:" +msgstr "注释:" -#: ../../community/versioning_policy.md -msgid "Doc for latest final release branch" -msgstr "最新正式发布分支的文档" +#: ../../source/community/versioning_policy.md:168 +msgid "`latest` documentation: always points to latest rc release of main branch." +msgstr "`latest` 文档:始终指向主分支的最新 rc 版本。" -#: ../../community/versioning_policy.md -msgid "Will be `vX.Y.Z-dev` after the first official release" -msgstr "首个正式发布后将会是 `vX.Y.Z-dev`" +#: ../../source/community/versioning_policy.md:169 +msgid "`rc version` documentation: there are no further updates after release." +msgstr "`rc version` 文档:发布后不再进行更新。" -#: ../../community/versioning_policy.md:100 -msgid "As shown above:" -msgstr "如上所示:" - -#: ../../community/versioning_policy.md:102 +#: ../../source/community/versioning_policy.md:170 msgid "" -"`latest` documentation: Matches the current maintenance branch `vX.Y.Z-dev` " -"(Will be `main` after the first final release). Continuously updated to " -"ensure usability for the latest release." -msgstr "" -"`latest` 文档:匹配当前维护分支 `vX.Y.Z-dev`(在首次正式发布后将为 `main`)。持续更新,以确保适用于最新发布版本。" +"`version` documentation: keep updating the `releases/vX.Y.Z` branch " +"documentation to fix doc bugs." +msgstr "`version` 文档:持续更新 `releases/vX.Y.Z` 分支的文档以修复文档错误。" -#: ../../community/versioning_policy.md:103 -msgid "" -"`version` documentation: Corresponds to specific released versions (e.g., " -"`v0.7.3`, `v0.7.3rc1`). No further updates after release." -msgstr "`version` 文档:对应特定的已发布版本(例如,`v0.7.3`、`v0.7.3rc1`)。发布后不再进行更新。" - -#: ../../community/versioning_policy.md:104 -msgid "" -"`stable` documentation (**not yet released**): Official release " -"documentation. Updates are allowed in real-time after release, typically " -"based on vX.Y.Z-dev. Once stable documentation is available, non-stable " -"versions should display a header warning: `You are viewing the latest " -"developer preview docs. Click here to view docs for the latest stable " -"release.`." -msgstr "" -"`stable` 文档(**尚未发布**):官方发布版文档。发布后允许实时更新,通常基于 " -"vX.Y.Z-dev。一旦稳定版文档可用,非稳定版本应显示一个顶部警告:`您正在查看最新的开发预览文档。点击此处查看最新稳定版本文档。`" - -#: ../../community/versioning_policy.md:106 -msgid "Software Dependency Management" +#: ../../source/community/versioning_policy.md:172 +msgid "Software dependency management" msgstr "软件依赖管理" -#: ../../community/versioning_policy.md:107 +#: ../../source/community/versioning_policy.md:174 msgid "" "`torch-npu`: Ascend Extension for PyTorch (torch-npu) releases a stable " "version to [PyPi](https://pypi.org/project/torch-npu) every 3 months, a " -"development version (aka the POC version) every month, and a nightly version" -" every day. The PyPi stable version **CAN** be used in vLLM Ascend final " -"version, the monthly dev version **ONLY CANN** be used in vLLM Ascend RC " -"version for rapid iteration, the nightly version **CANNOT** be used in vLLM " -"Ascend any version and branches." +"development version (aka the POC version) every month, and a nightly " +"version every day. The PyPi stable version **CAN** be used in vLLM Ascend" +" final version, the monthly dev version **ONLY CAN** be used in vLLM " +"Ascend RC version for rapid iteration, and the nightly version **CANNOT**" +" be used in vLLM Ascend any version or branch." msgstr "" -"`torch-npu`:Ascend Extension for PyTorch(torch-npu)每 3 个月会在 " -"[PyPi](https://pypi.org/project/torch-npu) 上发布一个稳定版本,每个月发布一个开发版本(即 POC " -"版本),每天发布一个 nightly 版本。PyPi 上的稳定版本**可以**用于 vLLM Ascend 的正式版本,月度开发版本**只能**用于 " -"vLLM Ascend 的 RC(候选发布)版本以便快速迭代,nightly 版本**不能**用于 vLLM Ascend 的任何版本和分支。" +"`torch-npu`:Ascend Extension for PyTorch(torch-npu)每 3 个月在 " +"[PyPi](https://pypi.org/project/torch-npu) 发布一个稳定版本,每月发布一个开发版本(亦称 POC 版本),每日发布一个 " +"nightly 版本。PyPi 稳定版本**可以**用于 vLLM Ascend 正式版,月度开发版本**仅能**用于 vLLM Ascend RC " +"版本以进行快速迭代,nightly 版本**不能**用于 vLLM Ascend 的任何版本或分支。" + +#~ msgid "MindIE Turbo" +#~ msgstr "MindIE Turbo" + +#~ msgid "2.0rc1" +#~ msgstr "2.0候选版本1" + +#~ msgid "The branch status will be in one of the following states:" +#~ msgstr "分支状态将处于以下几种状态之一:" + +#~ msgid "" +#~ "Note that vLLM Ascend will only be" +#~ " released for a certain vLLM release" +#~ " version rather than all versions. " +#~ "Hence, You might see only part of" +#~ " versions have dev branches (such as" +#~ " only `0.7.1-dev` / `0.7.3-dev` but " +#~ "no `0.7.2-dev`), this is as expected." +#~ msgstr "" +#~ "请注意,vLLM Ascend 仅会针对特定的 vLLM " +#~ "发布版本进行发布,而非所有版本。因此,您可能只会看到部分版本拥有开发分支(例如仅有 `0.7.1-dev` / `0.7.3-dev`,而没有 " +#~ "`0.7.2-dev`),这是正常现象。" + +#~ msgid "Doc for the latest dev branch" +#~ msgstr "最新开发分支的文档" + +#~ msgid "vX.Y.Z-dev (Will be `main` after the first final release)" +#~ msgstr "vX.Y.Z-dev(在首次正式发布后将成为 `main`)" + +#~ msgid "Git tags, like vX.Y.Z[rcN]" +#~ msgstr "Git 标签,如 vX.Y.Z[rcN]" + +#~ msgid "stable(not yet released)" +#~ msgstr "稳定版(尚未发布)" + +#~ msgid "Will be `vX.Y.Z-dev` after the first official release" +#~ msgstr "首次正式发布后将会是 `vX.Y.Z-dev`" + +#~ msgid "As shown above:" +#~ msgstr "如上所示:" + +#~ msgid "" +#~ "`latest` documentation: Matches the current" +#~ " maintenance branch `vX.Y.Z-dev` (Will be" +#~ " `main` after the first final " +#~ "release). Continuously updated to ensure " +#~ "usability for the latest release." +#~ msgstr "`latest` 文档:匹配当前维护分支 `vX.Y.Z-dev`(在首次正式发布后将成为 `main`)。持续更新以确保适用于最新发布版本。" + +#~ msgid "" +#~ "`stable` documentation (**not yet released**):" +#~ " Official release documentation. Updates " +#~ "are allowed in real-time after " +#~ "release, typically based on vX.Y.Z-dev. " +#~ "Once stable documentation is available, " +#~ "non-stable versions should display a " +#~ "header warning: `You are viewing the " +#~ "latest developer preview docs. Click " +#~ "here to view docs for the latest" +#~ " stable release.`." +#~ msgstr "" +#~ "`stable` 文档(**尚未发布**):官方发布版文档。发布后允许实时更新,通常基于 " +#~ "vX.Y.Z-dev。一旦稳定版文档可用,非稳定版本应显示一个顶部警告:`您正在查看最新的开发预览文档。点击此处查看最新稳定版本文档。`" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/ACL_Graph.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/ACL_Graph.po new file mode 100644 index 00000000..5337dee9 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/ACL_Graph.po @@ -0,0 +1,283 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:1 +msgid "ACL Graph" +msgstr "ACL 图" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:3 +msgid "Why do we need ACL Graph?" +msgstr "为什么需要 ACL 图?" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:5 +msgid "" +"In LLM inference, each token requires nearly a thousand operator " +"executions. When host launching operators are slower than device, it will" +" cause host bound. In severe cases, the device will be idle for more than" +" half of the time. To solve this problem, we use graph in LLM inference." +msgstr "" +"在 LLM 推理中,每个 token 需要执行近千次算子。当主机(host)启动算子的速度慢于设备(device)时,会导致主机瓶颈(host bound)。在严重情况下,设备超过一半的时间将处于空闲状态。为了解决这个问题,我们在 LLM 推理中使用图(graph)。" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:26 +msgid "How to use ACL Graph?" +msgstr "如何使用 ACL 图?" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:28 +msgid "" +"ACL Graph is enabled by default in V1 Engine, you just need to check that" +" `enforce_eager` is not set to `True`. More details see: [Graph Mode " +"Guide](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/graph_mode.html)" +msgstr "" +"ACL 图在 V1 引擎中默认启用,您只需确认 `enforce_eager` 未设置为 `True`。更多详情请参阅:[图模式指南](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/graph_mode.html)" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:30 +msgid "How it works?" +msgstr "工作原理" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:32 +msgid "" +"In short, graph mode works in two steps: **capture and replay**. When the" +" engine starts, we capture all of the ops in the model forward and save " +"it as a graph. When a request comes in, we just replay the graph on the " +"device and wait for the result." +msgstr "" +"简而言之,图模式分两步工作:**捕获(capture)和重放(replay)**。当引擎启动时,我们捕获模型前向传播中的所有算子并将其保存为一个图。当请求到达时,我们只需在设备上重放该图并等待结果。" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:34 +msgid "But in reality, graph mode is not that simple." +msgstr "但实际上,图模式并非如此简单。" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:36 +msgid "Padding and Bucketing" +msgstr "填充与分桶" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:38 +msgid "" +"Due to the fact that a graph can only replay the ops captured before, " +"without doing tiling and checking graph input, we need to ensure the " +"consistency of the graph input. However, we know that the model input's " +"shape depends on the request scheduled by the Scheduler, so we can't " +"ensure consistency." +msgstr "" +"由于图只能重放之前捕获的算子,而不会进行分片(tiling)或检查图输入,因此我们需要确保图输入的一致性。然而,我们知道模型输入的形状取决于调度器(Scheduler)安排的请求,因此无法保证一致性。" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:40 +msgid "" +"Obviously, we can solve this problem by capturing the biggest shape and " +"padding all of the model inputs to it. But this will bring a lot of " +"redundant computing and make performance worse. So we can capture " +"multiple graphs with different shapes, and pad the model input to the " +"nearest graph, which will greatly reduce redundant computing. But when " +"`max_num_batched_tokens` is very large, the number of graphs that need to" +" be captured will also become very large. We know that when the input " +"tensor's shape is large, the computing time will be very long, and graph " +"mode is not necessary in this case. So all of the things we need to do " +"are:" +msgstr "" +"显然,我们可以通过捕获最大形状并将所有模型输入填充到该形状来解决此问题。但这会带来大量冗余计算并使性能变差。因此,我们可以捕获多个不同形状的图,并将模型输入填充到最接近的图,这将大大减少冗余计算。但当 `max_num_batched_tokens` 非常大时,需要捕获的图数量也会变得非常大。我们知道,当输入张量的形状很大时,计算时间会很长,在这种情况下图模式并非必要。因此,我们需要做的所有事情是:" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:42 +msgid "Set a threshold;" +msgstr "设置一个阈值;" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:43 +msgid "" +"When `num_scheduled_tokens` is bigger than the threshold, use " +"`eager_mode`;" +msgstr "当 `num_scheduled_tokens` 大于阈值时,使用 `eager_mode`;" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:44 +msgid "Capture multiple graphs within a range below the threshold;" +msgstr "在低于阈值的范围内捕获多个图;" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:59 +msgid "Piecewise and Full graph" +msgstr "分段图与完整图" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:61 +msgid "" +"Due to the increasing complexity of the attention layer in current LLMs, " +"we can't ensure all types of attention can run in graph. In MLA, " +"prefill_tokens and decode_tokens have different calculation methods, so " +"when a batch has both prefills and decodes in MLA, graph mode is " +"difficult to handle this situation." +msgstr "" +"由于当前 LLM 中注意力层的复杂性不断增加,我们无法确保所有类型的注意力都能在图模式下运行。在 MLA 中,prefill_tokens 和 decode_tokens 有不同的计算方法,因此当 MLA 中的一个批次同时包含预填充和解码时,图模式难以处理这种情况。" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:63 +msgid "" +"vLLM solves this problem with piecewise graph mode. We use eager mode to " +"launch attention's ops, and use graph to deal with others. But this also " +"brings some problems: The cost of launching ops has become large again. " +"Although much smaller than eager mode, it will also lead to host bound " +"when the CPU is poor or `num_tokens` is small." +msgstr "" +"vLLM 通过分段图模式解决了这个问题。我们使用 eager 模式来启动注意力算子,并使用图来处理其他算子。但这也会带来一些问题:启动算子的开销再次变大。虽然比 eager 模式小得多,但当 CPU 性能较差或 `num_tokens` 较小时,仍会导致主机瓶颈。" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:65 +msgid "Altogether, we need to support both piecewise and full graph mode." +msgstr "总之,我们需要同时支持分段图和完整图模式。" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:67 +msgid "" +"When attention can run in graph, we tend to choose full graph mode to " +"achieve optimal performance;" +msgstr "当注意力可以在图中运行时,我们倾向于选择完整图模式以获得最佳性能;" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:68 +msgid "When full graph does not work, use piecewise graph as a substitute;" +msgstr "当完整图无法工作时,使用分段图作为替代;" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:69 +msgid "" +"When piecewise graph's performance is not good and full graph mode is " +"blocked, separate prefills and decodes, and use full graph mode in " +"**decode_only** situations. Because when a batch includes prefill " +"requests, usually `num_tokens` will be quite big and not cause host " +"bound." +msgstr "" +"当分段图性能不佳且完整图模式受阻时,将预填充和解码分离,并在 **decode_only** 情况下使用完整图模式。因为当一个批次包含预填充请求时,通常 `num_tokens` 会相当大,不会导致主机瓶颈。" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:71 +msgid "" +"Currently, due to stream resource constraint, we can only support a few " +"buckets in piecewise graph mode now, which will cause redundant computing" +" and may lead to performance degradation compared with eager mode." +msgstr "" +"目前,由于流资源限制,我们现在只能在分段图模式下支持少数几个桶(buckets),这会导致冗余计算,并且与 eager 模式相比可能导致性能下降。" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:73 +msgid "How is it implemented?" +msgstr "如何实现?" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:75 +msgid "" +"vLLM has already implemented most of the modules in graph mode. You can " +"see more details at: [CUDA " +"Graphs](https://docs.vllm.ai/en/latest/design/cuda_graphs.html)" +msgstr "" +"vLLM 已经在图模式下实现了大部分模块。您可以在以下链接查看更多详情:[CUDA 图](https://docs.vllm.ai/en/latest/design/cuda_graphs.html)" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:77 +msgid "" +"When in graph mode, vLLM will call " +"`current_platform.get_static_graph_wrapper_cls` to get the current " +"device's graph model wrapper, so what we need to do is implement the " +"graph mode wrapper on Ascend: `ACLGraphWrapper`." +msgstr "" +"在图模式下,vLLM 会调用 `current_platform.get_static_graph_wrapper_cls` 来获取当前设备的图模型包装器,因此我们需要做的是在 Ascend 上实现图模式包装器:`ACLGraphWrapper`。" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:79 +msgid "" +"vLLM has added `support_torch_compile` decorator to all models. This " +"decorator will replace the `__init__` and `forward` interface of the " +"model class. When `forward` is called, the code inside the " +"`ACLGraphWrapper` will be executed, and it will do capture or replay as " +"mentioned above." +msgstr "" +"vLLM 已为所有模型添加了 `support_torch_compile` 装饰器。此装饰器将替换模型类的 `__init__` 和 `forward` 接口。当调用 `forward` 时,`ACLGraphWrapper` 内部的代码将被执行,并执行如上所述的捕获或重放操作。" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:81 +msgid "" +"When using piecewise graph, we just need to follow the above-mentioned " +"process. But when in full graph, due to the complexity of the attention, " +"sometimes we need to update attention op's params before execution. So we" +" implement `update_attn_params` and `update_mla_attn_params` functions " +"for full graph mode. During forward, memory will be reused between " +"different ops, so we can't update attention op's params before forward. " +"In ACL Graph, we use `torch.npu.graph_task_update_begin` and " +"`torch.npu.graph_task_update_end` to do it, and use " +"`torch.npu.ExternalEvent` to ensure order between param updates and op " +"executions." +msgstr "" +"使用分段图时,我们只需遵循上述流程。但在完整图模式下,由于注意力的复杂性,有时我们需要在执行前更新注意力算子的参数。因此,我们为完整图模式实现了 `update_attn_params` 和 `update_mla_attn_params` 函数。在前向传播期间,内存会在不同算子之间重用,因此我们无法在前向传播之前更新注意力算子的参数。在 ACL 图中,我们使用 `torch.npu.graph_task_update_begin` 和 `torch.npu.graph_task_update_end` 来实现这一点,并使用 `torch.npu.ExternalEvent` 来确保参数更新与算子执行之间的顺序。" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:83 +msgid "DFX" +msgstr "DFX" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:85 +msgid "Stream resource constraint" +msgstr "流资源限制" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:87 +msgid "" +"Currently, we can only capture 1800 graphs at most, due to the limitation" +" of ACL graph that a graph requires at least a separate stream. This " +"number is bounded by the number of streams, which is 2048; we save 248 " +"streams as a buffer. Besides, there are many variables that can affect " +"the number of buckets:" +msgstr "" +"目前,由于 ACL 图的限制(一个图至少需要一个独立的流),我们最多只能捕获 1800 个图。这个数字受限于流的数量,即 2048;我们保留 248 个流作为缓冲区。此外,还有许多变量会影响桶的数量:" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:89 +msgid "" +"Piecewise graph divides the model into `num_hidden_layers + 1` sub " +"modules, based on the attention layer. Every sub module is a single graph" +" which needs to cost a stream, so the number of buckets in piecewise " +"graph mode is very tight compared with full graph mode." +msgstr "" +"分段图根据注意力层将模型划分为 `num_hidden_layers + 1` 个子模块。每个子模块都是一个单独的图,需要消耗一个流,因此与完整图模式相比,分段图模式下的桶数量非常紧张。" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:91 +msgid "" +"The number of streams required for a graph is related to the number of " +"comm domains. Each comm domain will increase one stream consumed by a " +"graph." +msgstr "一个图所需的流数量与通信域(comm domain)的数量有关。每个通信域都会增加一个图消耗的流。" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:93 +msgid "" +"When multi-stream is explicitly called in a sub module, it will consume " +"an additional stream." +msgstr "当在子模块中显式调用多流(multi-stream)时,它将消耗一个额外的流。" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:95 +msgid "" +"There are some other rules about ACL Graph and stream. Currently, we use " +"func `update_aclgraph_sizes` to calculate the maximum number of buckets " +"and update `graph_batch_sizes` to ensure stream resource is sufficient." +msgstr "" +"关于 ACL 图和流还有一些其他规则。目前,我们使用函数 `update_aclgraph_sizes` 来计算最大桶数并更新 `graph_batch_sizes`,以确保流资源充足。" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:97 +msgid "We will expand the stream resource limitation in the future." +msgstr "我们将在未来扩展流资源限制。" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:99 +msgid "Limitations" +msgstr "限制" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:101 +msgid "`FULL` and `FULL_AND_PIECEWISE` are not supported now;" +msgstr "目前不支持 `FULL` 和 `FULL_AND_PIECEWISE`;" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:102 +msgid "" +"When use ACL Graph and MTP and `num_speculative_tokens > 1`, as vLLM " +"don't support this case in v0.11.0, we need to set " +"`cudagraph_capture_sizes` explicitly." +msgstr "" +"当使用 ACL 图和 MTP 且 `num_speculative_tokens > 1` 时,由于 vLLM 在 v0.11.0 中不支持此情况,我们需要显式设置 `cudagraph_capture_sizes`。" + +#: ../../source/developer_guide/Design_Documents/ACL_Graph.md:103 +msgid "`use_inductor` is not supported now;" +msgstr "目前不支持 `use_inductor`;" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po new file mode 100644 index 00000000..48fc20c5 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po @@ -0,0 +1,309 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:1 +msgid "KV Cache Pool" +msgstr "KV 缓存池" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:3 +msgid "Why KV Cache Pool?" +msgstr "为什么需要 KV 缓存池?" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:5 +msgid "" +"Prefix caching is an important feature in LLM inference that can reduce " +"prefill computation time drastically." +msgstr "前缀缓存是大语言模型推理中的一项重要特性,可以显著减少预填充计算时间。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:7 +msgid "" +"However, the performance gain from prefix caching is highly dependent on " +"the cache hit rate, while the cache hit rate can be limited if one only " +"uses HBM for KV cache storage." +msgstr "然而,前缀缓存带来的性能提升高度依赖于缓存命中率,而如果仅使用 HBM 存储 KV 缓存,缓存命中率会受到限制。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:9 +msgid "" +"Hence, KV Cache Pool is proposed to utilize various types of storage " +"including HBM, DRAM, and SSD, making a pool for KV Cache storage while " +"making the prefix of requests visible across all nodes, increasing the " +"cache hit rate for all requests." +msgstr "因此,我们提出了 KV 缓存池,旨在利用包括 HBM、DRAM 和 SSD 在内的多种存储类型,构建一个 KV 缓存存储池,同时使请求的前缀在所有节点间可见,从而提高所有请求的缓存命中率。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:11 +msgid "" +"vLLM Ascend currently supports [MooncakeStore](https://github.com" +"/kvcache-ai/Mooncake), one of the most recognized KV Cache storage " +"engines." +msgstr "vLLM Ascend 目前支持 [MooncakeStore](https://github.com/kvcache-ai/Mooncake),这是最受认可的 KV 缓存存储引擎之一。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:13 +msgid "" +"While one can utilize Mooncake Store in vLLM V1 engine by setting it as a" +" remote backend of LMCache with GPU (see " +"[Tutorial](https://github.com/LMCache/LMCache/blob/dev/examples/kv_cache_reuse/remote_backends/mooncakestore/README.md))," +" we find it would be better to integrate a connector that directly " +"supports Mooncake Store and can utilize the data transfer strategy that " +"best fits Huawei NPU hardware." +msgstr "虽然可以通过将 Mooncake Store 设置为 GPU 上 LMCache 的远程后端来在 vLLM V1 引擎中使用它(参见[教程](https://github.com/LMCache/LMCache/blob/dev/examples/kv_cache_reuse/remote_backends/mooncakestore/README.md)),但我们认为集成一个直接支持 Mooncake Store 并能利用最适合华为 NPU 硬件的数据传输策略的连接器会更好。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:15 +msgid "" +"Hence, we propose to integrate Mooncake Store with a brand new " +"**MooncakeStoreConnectorV1**, which is indeed largely inspired by " +"**LMCacheConnectorV1** (see the `How is MooncakeStoreConnectorV1 " +"Implemented?` section)." +msgstr "因此,我们提议将 Mooncake Store 与全新的 **MooncakeStoreConnectorV1** 集成,该连接器的设计在很大程度上受到了 **LMCacheConnectorV1** 的启发(参见 `MooncakeStoreConnectorV1 是如何实现的?` 部分)。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:17 +msgid "Usage" +msgstr "使用方法" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:19 +msgid "" +"vLLM Ascend currently supports Mooncake Store for KV Cache Pool. To " +"enable Mooncake Store, one needs to configure `kv-transfer-config` and " +"choose `MooncakeStoreConnector` as the KV Connector." +msgstr "vLLM Ascend 目前支持使用 Mooncake Store 作为 KV 缓存池。要启用 Mooncake Store,需要配置 `kv-transfer-config` 并选择 `MooncakeStoreConnector` 作为 KV 连接器。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:21 +msgid "" +"For step-by-step deployment and configuration, please refer to the [KV " +"Pool User " +"Guide](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/kv_pool.html)." +msgstr "关于逐步部署和配置,请参考 [KV 池用户指南](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/kv_pool.html)。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:23 +msgid "How it works?" +msgstr "工作原理" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:25 +msgid "" +"The KV Cache Pool integrates multiple memory tiers (HBM, DRAM, SSD, etc.)" +" through a connector-based architecture." +msgstr "KV 缓存池通过基于连接器的架构,整合了多个内存层级(HBM、DRAM、SSD 等)。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:27 +msgid "" +"Each connector implements a unified interface for storing, retrieving, " +"and transferring KV blocks between tiers, depending on access frequency " +"and hardware bandwidth." +msgstr "每个连接器实现了一个统一的接口,用于根据访问频率和硬件带宽在不同层级之间存储、检索和传输 KV 块。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:29 +msgid "" +"When combined with vLLM’s Prefix Caching mechanism, the pool enables " +"efficient caching both locally (in HBM) and globally (via Mooncake), " +"ensuring that frequently used prefixes remain hot while less frequently " +"accessed KV data can spill over to lower-cost memory." +msgstr "当与 vLLM 的前缀缓存机制结合时,该池能够实现本地(HBM 中)和全局(通过 Mooncake)的高效缓存,确保常用前缀保持热状态,而访问频率较低的 KV 数据则可以溢出到成本更低的内存中。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:31 +msgid "1. Combining KV Cache Pool with HBM Prefix Caching" +msgstr "1. 将 KV 缓存池与 HBM 前缀缓存结合" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:33 +msgid "" +"Prefix Caching with HBM is already supported by the vLLM V1 Engine. By " +"introducing KV Connector V1, users can seamlessly combine HBM-based " +"Prefix Caching with Mooncake-backed KV Pool." +msgstr "vLLM V1 引擎已支持基于 HBM 的前缀缓存。通过引入 KV Connector V1,用户可以无缝地将基于 HBM 的前缀缓存与 Mooncake 支持的 KV 池结合起来。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:36 +msgid "" +"The user can enable both features simply by enabling Prefix Caching, " +"which is enabled by default in vLLM V1 unless the " +"`--no_enable_prefix_caching` flag is set, and setting up the KV Connector" +" for KV Pool (e.g., the MooncakeStoreConnector)." +msgstr "用户只需启用前缀缓存(在 vLLM V1 中默认启用,除非设置了 `--no_enable_prefix_caching` 标志)并为 KV 池设置 KV 连接器(例如 MooncakeStoreConnector),即可同时启用这两个功能。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:38 +msgid "**Workflow**:" +msgstr "**工作流程**:" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:40 +msgid "The engine first checks for prefix hits in the HBM cache." +msgstr "引擎首先检查 HBM 缓存中的前缀命中情况。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:42 +msgid "" +"After getting the number of hit tokens on HBM, it queries the KV Pool via" +" the connector. If there are additional hits in the KV Pool, we get the " +"**additional blocks only** from the KV Pool, and get the rest of the " +"blocks directly from HBM to minimize the data transfer latency." +msgstr "获取 HBM 上的命中令牌数量后,引擎通过连接器查询 KV 池。如果在 KV 池中有额外的命中,我们**仅从 KV 池获取额外的块**,其余块则直接从 HBM 获取,以最小化数据传输延迟。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:44 +msgid "" +"After the KV Caches in the KV Pool are loaded into HBM, the remaining " +"process is the same as Prefix Caching in HBM." +msgstr "将 KV 池中的 KV 缓存加载到 HBM 后,剩余过程与 HBM 中的前缀缓存相同。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:46 +msgid "2. Combining KV Cache Pool with Mooncake PD Disaggregation" +msgstr "2. 将 KV 缓存池与 Mooncake PD 解耦结合" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:48 +msgid "" +"When used together with Mooncake PD (Prefill-Decode) Disaggregation, the " +"KV Cache Pool can further decouple prefill and decode stages across " +"devices or nodes." +msgstr "当与 Mooncake PD(预填充-解码)解耦功能结合使用时,KV 缓存池可以进一步在设备或节点间解耦预填充和解码阶段。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:50 +msgid "" +"Currently, we only perform put and get operations of KV Pool for " +"**Prefill Nodes**, and Decode Nodes get their KV Cache from Mooncake P2P " +"KV Connector, i.e., MooncakeConnector." +msgstr "目前,我们仅对**预填充节点**执行 KV 池的 put 和 get 操作,解码节点则通过 Mooncake P2P KV 连接器(即 MooncakeConnector)获取其 KV 缓存。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:52 +msgid "" +"The key benefit of doing this is that we can keep the gain in performance" +" by computing less with Prefix Caching from HBM and KV Pool for Prefill " +"Nodes, while not sacrificing the data transfer efficiency between Prefill" +" and Decode nodes with P2P KV Connector that transfers KV Caches between " +"NPU devices directly." +msgstr "这样做的主要好处是,我们可以通过为预填充节点使用来自 HBM 和 KV 池的前缀缓存来减少计算量,从而保持性能增益,同时又不牺牲预填充节点与解码节点之间的数据传输效率,因为 P2P KV 连接器直接在 NPU 设备间传输 KV 缓存。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:54 +msgid "" +"To enable this feature, we need to set up both Mooncake Connector and " +"Mooncake Store Connector with a Multi Connector, which is a KV Connector " +"class provided by vLLM that can call multiple KV Connectors in a specific" +" order." +msgstr "要启用此功能,我们需要使用 Multi Connector 来设置 Mooncake Connector 和 Mooncake Store Connector。Multi Connector 是 vLLM 提供的一个 KV 连接器类,可以按特定顺序调用多个 KV 连接器。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:56 +msgid "" +"For details, please also refer to the Mooncake Connector Store Deployment" +" Guide." +msgstr "详情请参阅 Mooncake Connector Store 部署指南。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:58 +msgid "How is MooncakeStoreConnectorV1 Implemented?" +msgstr "MooncakeStoreConnectorV1 是如何实现的?" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:60 +msgid "" +"**MooncakeStoreConnectorV1** inherits the KV Connector V1 class in vLLM " +"V1: through implementing the required methods defined in the KV connector" +" V1 base class, one can integrate a third-party KV cache transfer/storage" +" backend into the vLLM framework." +msgstr "**MooncakeStoreConnectorV1** 继承自 vLLM V1 中的 KV Connector V1 类:通过实现 KV 连接器 V1 基类中定义的必要方法,可以将第三方 KV 缓存传输/存储后端集成到 vLLM 框架中。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:62 +msgid "" +"MooncakeStoreConnectorV1 is also largely inspired by LMCacheConnectorV1 " +"in terms of the `Lookup Engine`/`Lookup Client` design for looking up KV " +"cache keys, and the `ChunkedTokenDatabase` class for processing tokens " +"into prefix-aware hashes as well as other hashing related designs. On top" +" of this, we have also added our own design including `KVTransferThread` " +"that allows async `get` and `put` of KV caches with multi-threading, and " +"NPU-related data transfer optimization such as removing the `LocalBuffer`" +" in LMCache to remove redundant data transfer." +msgstr "MooncakeStoreConnectorV1 也在很大程度上借鉴了 LMCacheConnectorV1,包括用于查找 KV 缓存键的 `Lookup Engine`/`Lookup Client` 设计,以及用于将令牌处理为前缀感知哈希的 `ChunkedTokenDatabase` 类和其他哈希相关设计。在此基础上,我们还添加了自己的设计,包括允许通过多线程异步 `get` 和 `put` KV 缓存的 `KVTransferThread`,以及与 NPU 相关的数据传输优化,例如移除 LMCache 中的 `LocalBuffer` 以消除冗余数据传输。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:64 +msgid "" +"The KV Connector methods that need to be implemented can be categorized " +"into scheduler-side methods that are called in V1 scheduler and worker-" +"side methods that are called in V1 worker, namely:" +msgstr "需要实现的 KV 连接器方法可以分为在 V1 调度器中调用的调度器端方法和在 V1 工作器中调用的工作器端方法,即:" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:66 +msgid "KV Connector Scheduler-Side Methods" +msgstr "KV 连接器调度器端方法" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:68 +msgid "" +"`get_num_new_matched_tokens`: Get prefix cache hit in number of tokens " +"through looking up into the KV pool. `update_states_after_alloc`: " +"Update KVConnector state after temporary buffer alloc. " +"`build_connector_meta`: Attach the connector metadata to the request " +"object. `request_finished`: Once a request is finished, determine " +"whether request blocks should be freed now or will be sent asynchronously" +" and freed later." +msgstr "" +"`get_num_new_matched_tokens`:通过查询 KV 池,获取以令牌数表示的前缀缓存命中数。\n" +"`update_states_after_alloc`:临时缓冲区分配后更新 KVConnector 状态。\n" +"`build_connector_meta`:将连接器元数据附加到请求对象。\n" +"`request_finished`:请求完成后,确定请求块是应立即释放,还是将异步发送并稍后释放。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:73 +msgid "Connector Worker-Side Methods" +msgstr "连接器工作器端方法" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:75 +msgid "" +"`register_kv_caches`: Register KV cache buffers needed for KV cache " +"transfer. `start_load_kv`: Perform KV cache load operation that transfers" +" KV cache from storage to device. `wait_for_layer_load`: Optional; Wait " +"for layer load in layerwise + async KV load scenario. `save_kv_layer`: " +"Optional; Do layerwise KV cache put into KV Pool. `wait_for_save`: Wait " +"for KV Save to finish if async KV cache save/put. `get_finished`: Get " +"request that finished KV transfer, `done_sending` if `put` finished, " +"`done_receiving` if `get` finished." +msgstr "" +"`register_kv_caches`:注册 KV 缓存传输所需的 KV 缓存缓冲区。\n" +"`start_load_kv`:执行 KV 缓存加载操作,将 KV 缓存从存储传输到设备。\n" +"`wait_for_layer_load`:可选;在分层 + 异步 KV 加载场景中等待层加载。\n" +"`save_kv_layer`:可选;执行分层 KV 缓存放入 KV 池的操作。\n" +"`wait_for_save`:如果异步保存/放入 KV 缓存,则等待 KV 保存完成。\n" +"`get_finished`:获取已完成 KV 传输的请求,如果 `put` 完成则为 `done_sending`,如果 `get` 完成则为 `done_receiving`。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:82 +msgid "DFX" +msgstr "DFX(可诊断性、可维护性、可服务性)" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:84 +msgid "" +"When looking up a key in KV Pool, if we cannot find the key, there is no " +"Cache Hit for this specific block; we return no hit for this block and do" +" not look up further blocks for the current request." +msgstr "在 KV 池中查找键时,如果找不到该键,则此特定块没有缓存命中;我们返回此块未命中,并且不再为当前请求查找后续块。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:85 +msgid "" +"Similarly, when we are trying to put a block into KV Pool and it fails, " +"we do not put further blocks (subject to change)." +msgstr "类似地,当我们尝试将一个块放入 KV 池但失败时,我们不会放入后续块(可能更改)。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:87 +msgid "Limitations" +msgstr "限制" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:89 +msgid "" +"Currently, Mooncake Store for vLLM-Ascend only supports DRAM as the " +"storage for KV Cache pool." +msgstr "目前,vLLM-Ascend 的 Mooncake Store 仅支持 DRAM 作为 KV 缓存池的存储。" + +#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:91 +msgid "" +"For now, if we successfully looked up a key and found it exists, but " +"failed to get it when calling KV Pool's get function, we just output a " +"log indicating the get operation failed and keep going; hence, the " +"accuracy of that specific request may be affected. We will handle this " +"situation by falling back the request and re-compute everything assuming " +"there's no prefix cache hit (or even better, revert only one block and " +"keep using the Prefix Caches before that)." +msgstr "目前,如果我们成功查找到一个键并发现它存在,但在调用 KV 池的 get 函数时失败,我们仅输出一条日志表明 get 操作失败并继续执行;因此,该特定请求的准确性可能会受到影响。我们将通过回退请求并假设没有前缀缓存命中来重新计算所有内容(或者更好的是,仅回退一个块并继续使用该块之前的前缀缓存)来处理这种情况。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/ModelRunner_prepare_inputs.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/ModelRunner_prepare_inputs.po new file mode 100644 index 00000000..ddfff542 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/ModelRunner_prepare_inputs.po @@ -0,0 +1,629 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:1 +msgid "Prepare inputs for model forwarding" +msgstr "为模型前向传播准备输入" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:3 +msgid "Purpose" +msgstr "目的" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:5 +msgid "Information required to perform model forward pass:" +msgstr "执行模型前向传播所需的信息:" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:7 +msgid "the inputs" +msgstr "输入" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:8 +msgid "the corresponding attention metadata of the inputs" +msgstr "输入对应的注意力元数据" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:10 +msgid "The following diagram shows what we should prepare for model inference." +msgstr "下图展示了我们需要为模型推理准备的内容。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:20 +msgid "" +"Therefore, as long as we have these two pieces of information mentioned " +"above, we can perform the model's forward propagation." +msgstr "因此,只要我们拥有上述两方面的信息,就可以执行模型的前向传播。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:22 +msgid "" +"This document will explain **how we obtain the inputs and their " +"corresponding attention metadata**." +msgstr "本文将解释**我们如何获取输入及其对应的注意力元数据**。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:24 +msgid "Overview" +msgstr "概述" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:26 +msgid "1. Obtain inputs" +msgstr "1. 获取输入" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:28 +msgid "The workflow of obtaining inputs:" +msgstr "获取输入的工作流程:" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:30 +msgid "" +"Get `token positions`: relative position of each token within its request" +" sequence." +msgstr "获取 `token positions`:每个 token 在其请求序列中的相对位置。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:32 +msgid "Get `token indices`: index of each scheduled token in the token table." +msgstr "获取 `token indices`:每个已调度 token 在 token 表中的索引。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:34 +msgid "" +"Get `Token IDs`: using token indices to retrieve the Token IDs from " +"**token id table**." +msgstr "获取 `Token IDs`:使用 token indices 从 **token id table** 中检索 Token IDs。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:36 +msgid "" +"At last, these `Token IDs` are required to be fed into a model, and " +"`positions` should also be sent into the model to create `Rope` (Rotary " +"positional embedding). Both of them are the inputs of the model." +msgstr "最后,这些 `Token IDs` 需要输入到模型中,`positions` 也需要送入模型以创建 `Rope`(旋转位置编码)。两者共同构成模型的输入。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:38 +msgid "" +"**Note**: The `Token IDs` are the inputs of a model, so we also call them" +" `Inputs IDs`." +msgstr "**注意**:`Token IDs` 是模型的输入,因此我们也称它们为 `Inputs IDs`。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:40 +msgid "2. Build inputs attention metadata" +msgstr "2. 构建输入注意力元数据" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:42 +msgid "A model requires these attention metadata during the forward pass:" +msgstr "模型在前向传播过程中需要以下注意力元数据:" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:44 +msgid "" +"`query start location`: start and end location of each request " +"corresponding to the scheduled tokens." +msgstr "`query start location`:每个请求对应的已调度 token 的起始和结束位置。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:45 +msgid "" +"`sequence length`: length of each request including both computed tokens " +"and newly scheduled tokens." +msgstr "`sequence length`:每个请求的长度,包括已计算 token 和新调度的 token。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:46 +msgid "`number of computed tokens`: number of computed tokens for each request." +msgstr "`number of computed tokens`:每个请求已计算 token 的数量。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:47 +msgid "`number of requests`: number of requests in this batch." +msgstr "`number of requests`:本批次中的请求数量。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:48 +msgid "`number of tokens`: total number of scheduled tokens in this batch." +msgstr "`number of tokens`:本批次中已调度 token 的总数。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:49 +msgid "" +"**`block table`**: translates the logical address (within its sequence) " +"of each block to its global physical address in the device's memory." +msgstr "**`block table`**:将每个块在其序列内的逻辑地址转换为其在设备内存中的全局物理地址。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:50 +msgid "" +"`max query len`: the longest scheduled tokens length in this request " +"batch." +msgstr "`max query len`:本请求批次中最长的已调度 token 长度。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:51 +msgid "" +"`slot mapping`: indices of each token that input token will be stored " +"into." +msgstr "`slot mapping`:输入 token 将被存储到的每个 token 的索引。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:52 +msgid "" +"`attention mask`: mask matrix applied to attention scores before softmax " +"to control which tokens can attend to each other (usually a causal " +"attention)." +msgstr "`attention mask`:在 softmax 之前应用于注意力分数的掩码矩阵,用于控制哪些 token 可以相互关注(通常是因果注意力)。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:54 +msgid "Before start" +msgstr "开始之前" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:56 +msgid "There are mainly three types of variables." +msgstr "主要有三种类型的变量。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:58 +msgid "" +"token level: represents one attribute corresponding to each scheduled " +"token, so the length of this variable is the number of scheduled tokens." +msgstr "token 级别:代表每个已调度 token 对应的一个属性,因此该变量的长度等于已调度 token 的数量。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:59 +msgid "" +"request level: represents one attribute of each scheduled request, whose " +"length usually is the number of scheduled requests. (`query start " +"location` is a special case, which has one more element.)" +msgstr "请求级别:代表每个已调度请求的一个属性,其长度通常等于已调度请求的数量。(`query start location` 是一个特例,它多一个元素。)" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:60 +msgid "system level:" +msgstr "系统级别:" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:61 +msgid "" +"**Token IDs table**: stores the token IDs (i.e. the inputs of a model) of" +" each request. The shape of this table is `(max num request, max model " +"len)`. Here, `max num request` is the maximum count of concurrent " +"requests allowed in a forward batch and `max model len` is the maximum " +"token count that can be handled at one request sequence in this model." +msgstr "**Token IDs table**:存储每个请求的 token IDs(即模型的输入)。此表的形状为 `(max num request, max model len)`。其中,`max num request` 是前向批次中允许的最大并发请求数,`max model len` 是该模型中单个请求序列可以处理的最大 token 数量。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:62 +msgid "" +"**Block table**: translates the logical address (within its sequence) of " +"each block to its global physical address in the device's memory. The " +"shape of this table is `(max num request, max model len / block size)`" +msgstr "**Block table**:将每个块在其序列内的逻辑地址转换为其在设备内存中的全局物理地址。此表的形状为 `(max num request, max model len / block size)`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:64 +msgid "" +"**Note**: Both of these two tables come from the `_update_states` method " +"before **preparing inputs**. You can take a look if you need more " +"inspiration." +msgstr "**注意**:这两个表都来自 **准备输入** 之前的 `_update_states` 方法。如果需要更多启发,可以查看一下。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:66 +msgid "Tips" +msgstr "提示" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:68 +msgid "" +"Simply put, a `token ID` is an **integer** (usually `int32`), which " +"represents a token. Example of `Token ID`:" +msgstr "简而言之,一个 `token ID` 是一个**整数**(通常是 `int32`),它代表一个 token。`Token ID` 示例:" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:88 +msgid "Go through details" +msgstr "深入细节" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:90 +msgid "Assumptions:" +msgstr "假设:" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:92 +msgid "maximum number of tokens that can be scheduled at once: 10" +msgstr "一次可调度的最大 token 数:10" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:93 +msgid "`block size`: 2" +msgstr "`block size`:2" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:94 +msgid "" +"Totally schedule 3 requests. Their prompt lengths are 3, 2, and 8 " +"respectively." +msgstr "总共调度 3 个请求。它们的提示长度分别为 3、2 和 8。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:95 +msgid "" +"`max model length`: 12 (the maximum token count that can be handled at " +"one request sequence in a model)." +msgstr "`max model length`:12(模型中单个请求序列可以处理的最大 token 数量)。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:97 +msgid "" +"These assumptions are configured at the beginning when starting vLLM. " +"They are not fixed, so you can manually set them." +msgstr "这些假设是在启动 vLLM 时配置的。它们不是固定的,因此可以手动设置。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:99 +msgid "Step 1: All requests in the prefill phase" +msgstr "步骤 1:所有请求均处于预填充阶段" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:101 +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:213 +msgid "Obtain inputs" +msgstr "获取输入" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:103 +#, python-brace-format +msgid "" +"As the maximum number of tokens that can be scheduled is 10, the " +"scheduled tokens of each request can be represented as `{'0': 3, '1': 2, " +"'2': 5}`. Note that `request_2` uses chunked prefill, leaving 3 prompt " +"tokens unscheduled." +msgstr "由于一次可调度的最大 token 数为 10,每个请求的已调度 token 可以表示为 `{'0': 3, '1': 2, '2': 5}`。注意 `request_2` 使用了分块预填充,留下了 3 个提示 token 未调度。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:105 +msgid "1. Get token positions" +msgstr "1. 获取 token positions" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:107 +msgid "" +"First, determine which request each token belongs to: tokens 0–2 are " +"assigned to **request_0**, tokens 3–4 to **request_1**, and tokens 5–9 to" +" **request_2**. To represent this mapping, we use `request indices`, for " +"example, `request indices`: `[0, 0, 0, 1, 1, 2, 2, 2, 2, 2]`." +msgstr "首先,确定每个 token 属于哪个请求:token 0–2 分配给 **request_0**,token 3–4 分配给 **request_1**,token 5–9 分配给 **request_2**。为了表示这种映射,我们使用 `request indices`,例如,`request indices`:`[0, 0, 0, 1, 1, 2, 2, 2, 2, 2]`。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:109 +msgid "" +"For each request, use **the number of computed tokens** + **the relative " +"position of current scheduled tokens** (`request_0: [0 + 0, 0 + 1, 0 + " +"2]`, `request_1: [0 + 0, 0 + 1]`, `request_2: [0 + 0, 0 + 1,..., 0 + 4]`)" +" and then concatenate them together (`[0, 1, 2, 0, 1, 0, 1, 2, 3, 4]`)." +msgstr "对于每个请求,使用 **已计算 token 的数量** + **当前调度 token 的相对位置**(`request_0: [0 + 0, 0 + 1, 0 + 2]`,`request_1: [0 + 0, 0 + 1]`,`request_2: [0 + 0, 0 + 1,..., 0 + 4]`),然后将它们连接在一起(`[0, 1, 2, 0, 1, 0, 1, 2, 3, 4]`)。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:111 +msgid "" +"Note: there is a more efficient way (using `request indices`) to create " +"positions in actual code." +msgstr "注意:在实际代码中,有一种更高效的方法(使用 `request indices`)来创建 positions。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:113 +msgid "" +"Finally, `token positions` can be obtained as `[0, 1, 2, 0, 1, 0, 1, 2, " +"3, 4]`. This variable is **token level**." +msgstr "最后,`token positions` 可以获取为 `[0, 1, 2, 0, 1, 0, 1, 2, 3, 4]`。此变量是 **token 级别** 的。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:115 +msgid "2. Get token indices" +msgstr "2. 获取 token indices" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:117 +msgid "" +"The shape of the current **Token IDs table** is `(max num request, max " +"model len)`." +msgstr "当前 **Token IDs table** 的形状为 `(max num request, max model len)`。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:119 +msgid "" +"Why are these `T_3_5`, `T_3_6`, `T_3_7` in this table without being " +"scheduled?" +msgstr "为什么表中的 `T_3_5`、`T_3_6`、`T_3_7` 没有被调度?" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:121 +msgid "" +"We fill all Token IDs in one request sequence to this table at once, but " +"we only retrieve the tokens we scheduled this time. Then we retrieve the " +"remaining Token IDs next time." +msgstr "我们将一个请求序列中的所有 Token IDs 一次性填充到此表中,但我们只检索本次调度的 token。然后下次再检索剩余的 Token IDs。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:133 +msgid "Note that `T_x_x` is an `int32`." +msgstr "注意 `T_x_x` 是一个 `int32`。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:135 +msgid "" +"Let's say `M = max model len`. Then we can use `token positions` together" +" with `request indices` of each token to construct `token indices`." +msgstr "假设 `M = max model len`。那么我们可以使用 `token positions` 以及每个 token 的 `request indices` 来构造 `token indices`。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:137 +msgid "" +"So `token indices` = `[0 + 0 * M, 1 + 0 * M, 2 + 0 * M, 0 + 1 * M, 1 + 1 " +"* M, 0 + 2 * M, 1 + 2 * M, 2 + 2 * M, 3 + 2 * M, 4 + 2 * M]` = `[0, 1, 2," +" 12, 13, 24, 25, 26, 27, 28]`" +msgstr "所以 `token indices` = `[0 + 0 * M, 1 + 0 * M, 2 + 0 * M, 0 + 1 * M, 1 + 1 * M, 0 + 2 * M, 1 + 2 * M, 2 + 2 * M, 3 + 2 * M, 4 + 2 * M]` = `[0, 1, 2, 12, 13, 24, 25, 26, 27, 28]`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:139 +msgid "3. Retrieve the Token IDs" +msgstr "3. 检索 Token IDs" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:141 +msgid "" +"We use `token indices` to select out the corresponding `Input IDs` from " +"the token table. The pseudocode is as follows:" +msgstr "我们使用 `token indices` 从 token 表中选择出对应的 `Input IDs`。伪代码如下:" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:147 +msgid "As mentioned before, we refer to these `Token IDs` as `Input IDs`." +msgstr "如前所述,我们将这些 `Token IDs` 称为 `Input IDs`。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:149 +msgid "" +"`Input IDs` = `[T_0_0, T_0_1, T_0_2, T_1_0, T_1_1, T_2_0, T_2_1, T_3_2, " +"T_3_3, T_3_4]`" +msgstr "`Input IDs` = `[T_0_0, T_0_1, T_0_2, T_1_0, T_1_1, T_2_0, T_2_1, T_3_2, T_3_3, T_3_4]`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:151 +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:237 +msgid "Build inputs attention metadata" +msgstr "构建输入注意力元数据" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:153 +msgid "" +"In the current **Block Table**, we use the first block (i.e. block_0) to " +"mark the unused block. The shape of the block is `(max num request, max " +"model len / block size)`, where `max model len / block size = 12 / 2 = " +"6`." +msgstr "" +"在当前的**块表**中,我们使用第一个块(即 block_0)来标记未使用的块。块的形状为 `(最大请求数, 最大模型长度 / 块大小)`,其中 `最大模型长度 / 块大小 = 12 / 2 = 6`。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:165 +msgid "The KV cache block in the device memory is like:" +msgstr "设备内存中的 KV 缓存块如下所示:" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:171 +msgid "" +"Let's say `K = max model len / block size = 6`, and we can get token " +"`device block number`." +msgstr "假设 `K = 最大模型长度 / 块大小 = 6`,我们可以得到令牌的`设备块编号`。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:173 +msgid "The workflow of achieving slot mapping:" +msgstr "实现槽映射的工作流程:" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:175 +msgid "Get `block table indices` using `K`, `positions` and `request indices`." +msgstr "使用 `K`、`positions` 和 `request indices` 获取`块表索引`。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:177 +msgid "" +"Purpose: For each token, it could be used to select `device block number`" +" from `block table`." +msgstr "目的:对于每个令牌,它可用于从`块表`中选择`设备块编号`。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:179 +msgid "Get `device block number` using `block table indices`." +msgstr "使用`块表索引`获取`设备块编号`。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:181 +msgid "" +"Purpose: `device block number` indicates which device block each token " +"belongs to." +msgstr "目的:`设备块编号`指示每个令牌属于哪个设备块。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:183 +msgid "Get `block offsets` using `positions` and `block size`." +msgstr "使用 `positions` 和 `block size` 获取`块内偏移`。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:185 +msgid "" +"Purpose: `block offsets` indicates the offsets of each token within a " +"block." +msgstr "目的:`块内偏移`指示每个令牌在块内的偏移量。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:187 +msgid "construct `slot mapping` using `device block number` and `block offsets`." +msgstr "使用`设备块编号`和`块内偏移`构建`槽映射`。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:189 +msgid "Purpose: we can use `slot mapping` to store Token IDs into token slots." +msgstr "目的:我们可以使用`槽映射`将令牌 ID 存储到令牌槽中。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:191 +msgid "Details:" +msgstr "详细信息:" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:193 +msgid "" +"(**Token level**) Use a simple formula to calculate `block table " +"indices`: `request indices * K + positions / block size`. So it equals " +"`[0 * 6 + 0 / 2, 0 * 6 + 1 / 2, 0 * 6 + 2 / 2, 1 * 6 + 0 / 2, 1 * 6 + 1 /" +" 2, 2 * 6 + 0 / 2, 2 * 6 + 1 / 2, 2 * 6 + 2 / 2, 2 * 6 + 3 / 2, 2 * 6 + 4" +" / 2] = [0, 0, 1, 6, 6, 12, 12, 13, 13, 14]`. This could be used to " +"select `device block number` from `block table`." +msgstr "" +"(**令牌级别**) 使用一个简单的公式计算`块表索引`:`request indices * K + positions / block size`。因此它等于 `[0 * 6 + 0 / 2, 0 * 6 + 1 / 2, 0 * 6 + 2 / 2, 1 * 6 + 0 / 2, 1 * 6 + 1 / 2, 2 * 6 + 0 / 2, 2 * 6 + 1 / 2, 2 * 6 + 2 / 2, 2 * 6 + 3 / 2, 2 * 6 + 4 / 2] = [0, 0, 1, 6, 6, 12, 12, 13, 13, 14]`。这可用于从`块表`中选择`设备块编号`。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:194 +msgid "" +"(**Token level**) Use `block table indices` to select out `device block " +"number` for each scheduled token. The pseudocode is `block_numbers = " +"block_table[block_table_indices]`. So `device block number=[1, 1, 2, 3, " +"3, 4, 4, 5, 5, 6]`" +msgstr "" +"(**令牌级别**) 使用`块表索引`为每个已调度的令牌选择出`设备块编号`。伪代码为 `block_numbers = block_table[block_table_indices]`。因此 `设备块编号=[1, 1, 2, 3, 3, 4, 4, 5, 5, 6]`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:195 +msgid "" +"(**Token level**) `block offsets` could be computed by `block offsets = " +"positions % block size = [0, 1, 0, 0, 1, 0, 1, 0, 1, 0]`." +msgstr "" +"(**令牌级别**) `块内偏移`可以通过 `block offsets = positions % block size = [0, 1, 0, 0, 1, 0, 1, 0, 1, 0]` 计算得出。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:196 +msgid "" +"Finally, use `block offsets` and `device block number` to create `slot " +"mapping`: `device block number * block size + block_offsets = [2, 3, 4, " +"6, 7, 8, 9, 10, 11, 12]`" +msgstr "" +"最后,使用`块内偏移`和`设备块编号`创建`槽映射`:`设备块编号 * 块大小 + 块内偏移 = [2, 3, 4, 6, 7, 8, 9, 10, 11, 12]`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:198 +msgid "(**Request level**) As we know the scheduled token count is `[3, 2, 5]`:" +msgstr "(**请求级别**) 已知已调度的令牌数量为 `[3, 2, 5]`:" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:200 +msgid "" +"(**Request level**) Use prefix sum to calculate `query start location`: " +"`[0, 3, 5, 10]`." +msgstr "(**请求级别**) 使用前缀和计算`查询起始位置`:`[0, 3, 5, 10]`。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:201 +msgid "" +"(**Request level**) All tokens in step 1 are in the prefill stage, and " +"the computed tokens count is 0; then `sequence length` = `[3, 2, 5]`." +msgstr "(**请求级别**) 步骤 1 中的所有令牌都处于预填充阶段,已计算的令牌数量为 0;因此 `序列长度` = `[3, 2, 5]`。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:202 +msgid "" +"(**Request level**) As mentioned above, `number of computed tokens` are " +"all 0s: `[0, 0, 0]`." +msgstr "(**请求级别**) 如上所述,`已计算令牌数`均为 0:`[0, 0, 0]`。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:203 +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:272 +msgid "`number of requests`: `3`" +msgstr "`请求数量`:`3`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:204 +msgid "(**Request level**) `number of tokens`: `[3, 2, 5]`" +msgstr "(**请求级别**) `令牌数量`:`[3, 2, 5]`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:205 +msgid "`max query len`: `5`" +msgstr "`最大查询长度`:`5`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:206 +msgid "(**Token level**) `slot mapping`: `[2, 3, 4, 6, 7, 8, 9, 10, 11, 12]`" +msgstr "(**令牌级别**) `槽映射`:`[2, 3, 4, 6, 7, 8, 9, 10, 11, 12]`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:207 +msgid "" +"`attention mask`: For all requests that initiate a prefill process, we " +"simply create only one mask matrix for reuse across different requests. " +"The shape of this mask matrix is `5 * 5`:" +msgstr "`注意力掩码`:对于所有发起预填充过程的请求,我们仅创建一个掩码矩阵,以便在不同请求间复用。该掩码矩阵的形状为 `5 * 5`:" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:209 +msgid "Step 2: Chunked prefill" +msgstr "步骤 2:分块预填充" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:211 +msgid "" +"In Step 2, we no longer provide explanations or perform calculations; " +"instead, we directly present the final result." +msgstr "在步骤 2 中,我们不再提供解释或进行计算;而是直接呈现最终结果。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:215 +#, python-brace-format +msgid "Scheduled token of each request: `{'0': 1, '1': 1, '2': 3}`" +msgstr "每个请求的已调度令牌:`{'0': 1, '1': 1, '2': 3}`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:217 +msgid "`request indices`: `[0, 1, 2, 2, 2]`" +msgstr "`请求索引`:`[0, 1, 2, 2, 2]`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:218 +msgid "`token positions`: `[3, 2, 5, 6, 7]`" +msgstr "`令牌位置`:`[3, 2, 5, 6, 7]`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:220 +msgid "Current **Token IDs table**:" +msgstr "当前**令牌 ID 表**:" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:232 +msgid "" +"**Note**: **T_0_3**, **T_1_2** are new Token IDs of **request_0** and " +"**request_1** respectively. They are sampled from the output of the " +"model." +msgstr "**注意**:**T_0_3**、**T_1_2** 分别是 **request_0** 和 **request_1** 的新令牌 ID。它们是从模型输出中采样得到的。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:234 +msgid "`token indices`: `[3, 14, 29, 30, 31]`" +msgstr "`令牌索引`:`[3, 14, 29, 30, 31]`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:235 +msgid "`Input IDs`: `[T_0_3, T_1_2, T_3_5, T_3_6, T_3_7]`" +msgstr "`输入 ID`:`[T_0_3, T_1_2, T_3_5, T_3_6, T_3_7]`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:239 +msgid "" +"We allocate the blocks `7` and `8` to `request_1` and `request_2` " +"respectively, as they need more space in device to store KV cache " +"following token generation or chunked prefill." +msgstr "我们将块 `7` 和 `8` 分别分配给 `request_1` 和 `request_2`,因为它们在令牌生成或分块预填充后需要更多设备空间来存储 KV 缓存。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:241 +msgid "Current **Block Table**:" +msgstr "当前**块表**:" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:253 +msgid "KV cache block in the device memory:" +msgstr "设备内存中的 KV 缓存块:" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:259 +msgid "(**Token level**) `block table indices`: `[1, 7, 14, 15, 15]`" +msgstr "(**令牌级别**) `块表索引`:`[1, 7, 14, 15, 15]`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:260 +msgid "(**Token level**) `device block number`: `[2, 7, 6, 8, 8]`" +msgstr "(**令牌级别**) `设备块编号`:`[2, 7, 6, 8, 8]`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:261 +msgid "(**Token level**) `block offsets`: `[1, 0, 1, 0, 1]`" +msgstr "(**令牌级别**) `块内偏移`:`[1, 0, 1, 0, 1]`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:262 +msgid "(**Token level**) `slot mapping`: `[5, 14, 13, 16, 17]`" +msgstr "(**令牌级别**) `槽映射`:`[5, 14, 13, 16, 17]`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:264 +msgid "Scheduled token count: `[1, 1, 3]`" +msgstr "已调度令牌数量:`[1, 1, 3]`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:266 +msgid "`query start location`: `[0, 1, 2, 5]`" +msgstr "`查询起始位置`:`[0, 1, 2, 5]`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:268 +msgid "`sequence length`: `[4, 3, 8]`" +msgstr "`序列长度`:`[4, 3, 8]`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:270 +msgid "`number of computed tokens`: `[3, 2, 5]`" +msgstr "`已计算令牌数`:`[3, 2, 5]`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:274 +msgid "`max query len`: `3`" +msgstr "`最大查询长度`:`3`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:276 +msgid "`slot mapping`: `[5, 14, 13, 16, 17]`" +msgstr "`槽映射`:`[5, 14, 13, 16, 17]`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:278 +msgid "`attention mask`: `5 * 8`" +msgstr "`注意力掩码`:`5 * 8`" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:280 +msgid "Each token has a `1 * 8` vector, and there are 5 scheduled tokens." +msgstr "每个令牌有一个 `1 * 8` 的向量,共有 5 个已调度的令牌。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:282 +msgid "At last" +msgstr "最后" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:284 +msgid "" +"If you understand step 1 and step 2, you will know all the following " +"steps." +msgstr "如果您理解了步骤 1 和步骤 2,您就会知道所有后续步骤。" + +#: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:286 +msgid "" +"Hope this document helps you better understand how vLLM prepares inputs " +"for model forwarding. If you have any good ideas, you are welcome to " +"contribute to us." +msgstr "希望本文档能帮助您更好地理解 vLLM 如何为模型前向传播准备输入。如果您有任何好的想法,欢迎向我们贡献。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/add_custom_aclnn_op.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/add_custom_aclnn_op.po new file mode 100644 index 00000000..8770b6ce --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/add_custom_aclnn_op.po @@ -0,0 +1,84 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/developer_guide/Design_Documents/add_custom_aclnn_op.md:1 +msgid "Adding a custom aclnn operation" +msgstr "添加自定义 aclnn 算子" + +#: ../../source/developer_guide/Design_Documents/add_custom_aclnn_op.md:3 +msgid "" +"This document describes how to add a custom aclnn operation to vllm-" +"ascend." +msgstr "本文档描述了如何向 vllm-ascend 添加自定义 aclnn 算子。" + +#: ../../source/developer_guide/Design_Documents/add_custom_aclnn_op.md:5 +msgid "How custom aclnn operation works in vllm-ascend?" +msgstr "自定义 aclnn 算子在 vllm-ascend 中如何工作?" + +#: ../../source/developer_guide/Design_Documents/add_custom_aclnn_op.md:7 +msgid "" +"Custom aclnn operations are built and installed into " +"`vllm_ascend/cann_ops_custom` directory during the build process of vllm-" +"ascend. Then the aclnn operators are bound to `torch.ops._C_ascend` " +"module, enabling users to invoke them in vllm-ascend python code." +msgstr "自定义 aclnn 算子在 vllm-ascend 的构建过程中被编译并安装到 `vllm_ascend/cann_ops_custom` 目录。然后,这些 aclnn 算子被绑定到 `torch.ops._C_ascend` 模块,使用户能够在 vllm-ascend 的 Python 代码中调用它们。" + +#: ../../source/developer_guide/Design_Documents/add_custom_aclnn_op.md:9 +msgid "To enable custom operations, use the following code:" +msgstr "要启用自定义算子,请使用以下代码:" + +#: ../../source/developer_guide/Design_Documents/add_custom_aclnn_op.md:17 +msgid "How to add a custom aclnn operation?" +msgstr "如何添加自定义 aclnn 算子?" + +#: ../../source/developer_guide/Design_Documents/add_custom_aclnn_op.md:19 +msgid "Create a new operation folder under `csrc` directory." +msgstr "在 `csrc` 目录下创建一个新的算子文件夹。" + +#: ../../source/developer_guide/Design_Documents/add_custom_aclnn_op.md:20 +msgid "" +"Create `op_host` and `op_kernel` directories for host and kernel source " +"code." +msgstr "为宿主端和内核源代码创建 `op_host` 和 `op_kernel` 目录。" + +#: ../../source/developer_guide/Design_Documents/add_custom_aclnn_op.md:21 +msgid "" +"Add build options in `csrc/build_aclnn.sh` for supported SOC. Note that " +"multiple ops should be separated with `;`, i.e. `CUSTOM_OPS=op1;op2;op3`." +msgstr "在 `csrc/build_aclnn.sh` 中为支持的 SOC 添加构建选项。注意多个算子应用 `;` 分隔,例如 `CUSTOM_OPS=op1;op2;op3`。" + +#: ../../source/developer_guide/Design_Documents/add_custom_aclnn_op.md:22 +msgid "" +"Bind aclnn operators to torch.ops._C_ascend module in " +"`csrc/torch_binding.cpp`." +msgstr "在 `csrc/torch_binding.cpp` 中将 aclnn 算子绑定到 torch.ops._C_ascend 模块。" + +#: ../../source/developer_guide/Design_Documents/add_custom_aclnn_op.md:23 +msgid "" +"Write a meta implementation in `csrc/torch_binding_meta.cpp` for the op " +"to be captured into the aclgraph." +msgstr "在 `csrc/torch_binding_meta.cpp` 中为算子编写一个元实现,以便其能被捕获到 aclgraph 中。" + +#: ../../source/developer_guide/Design_Documents/add_custom_aclnn_op.md:25 +msgid "" +"After a successful build of vllm-ascend, the custom aclnn operation can " +"be invoked in python code." +msgstr "成功构建 vllm-ascend 后,即可在 Python 代码中调用自定义的 aclnn 算子。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/context_parallel.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/context_parallel.po new file mode 100644 index 00000000..99f2f726 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/context_parallel.po @@ -0,0 +1,391 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:1 +msgid "Context Parallel (CP)" +msgstr "上下文并行 (CP)" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:3 +msgid "" +"TL;DR PCP accelerates prefill via sequence splitting. DCP eliminates KV " +"cache redundancy." +msgstr "TL;DR PCP 通过序列分割加速预填充。DCP 消除 KV 缓存冗余。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:5 +msgid "![ContextParallel](../../assets/cp/overview.png)" +msgstr "![ContextParallel](../../assets/cp/overview.png)" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:5 +msgid "ContextParallel" +msgstr "ContextParallel" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:7 +msgid "" +"For the main discussions during the development process, please refer to " +"the [RFC](https://github.com/vllm-project/vllm/issues/25749) and the " +"relevant links referenced by or referencing this RFC." +msgstr "关于开发过程中的主要讨论,请参阅 [RFC](https://github.com/vllm-project/vllm/issues/25749) 以及该 RFC 引用或被引用的相关链接。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:9 +msgid "What is CP?" +msgstr "什么是 CP?" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:11 +msgid "" +"**Context Parallel (CP)** is a strategy for parallelizing computation " +"along the sequence dimension across multiple devices." +msgstr "**上下文并行 (CP)** 是一种沿序列维度在多个设备间并行计算的策略。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:13 +msgid "" +"**Prefill Context Parallel (PCP)** expands the world size of devices and " +"uses dedicated communication domains. Its primary goal is to partition " +"the sequence dimension during the prefill phase, enabling different " +"devices to compute distinct chunks of the sequence simultaneously. The KV" +" cache is sharded along the sequence dimension across devices. This " +"approach impacts the computational logic of both the Prefill and Decode " +"stages to varying degrees." +msgstr "**预填充上下文并行 (PCP)** 扩展了设备的世界大小并使用专用的通信域。其主要目标是在预填充阶段对序列维度进行分区,使不同设备能同时计算序列的不同分块。KV 缓存沿序列维度跨设备分片。此方法在不同程度上影响了预填充和解码阶段的计算逻辑。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:18 +msgid "" +"**Decode Context Parallel (DCP)** reuses the communication domain of " +"Tensor Parallelism (TP) and does not require additional devices. Its main" +" objective is to eliminate duplicated storage of the KV cache by sharding" +" it along the sequence dimension across devices within the TP domain that" +" would otherwise hold redundant copies. DCP primarily influences the " +"Decode logic, as well as the logic for chunked prefill and cached " +"prefill." +msgstr "**解码上下文并行 (DCP)** 复用张量并行 (TP) 的通信域,且不需要额外的设备。其主要目标是通过在 TP 域内沿序列维度对 KV 缓存进行分片,消除原本会存储冗余副本的设备间的重复存储。DCP 主要影响解码逻辑,以及分块预填充和缓存预填充的逻辑。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:22 +msgid "How to Use CP?" +msgstr "如何使用 CP?" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:24 +msgid "" +"Please refer to the [context parallel user " +"guide](../../user_guide/feature_guide/context_parallel.md) for detailed " +"information." +msgstr "详细信息请参阅 [上下文并行用户指南](../../user_guide/feature_guide/context_parallel.md)。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:26 +msgid "How It Works?" +msgstr "工作原理" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:28 +msgid "Device Distribution" +msgstr "设备分布" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:30 +msgid "" +"We introduce new communication domains for PCP and reuse TP for DCP, and " +"this is the new layout of devices for PCP2, DCP2, and TP4. " +"![device_world](../../assets/cp/device_world.png)" +msgstr "我们为 PCP 引入了新的通信域,并为 DCP 复用了 TP 的通信域,这是 PCP2、DCP2 和 TP4 的新设备布局。![device_world](../../assets/cp/device_world.png)" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:30 +msgid "device_world" +msgstr "device_world" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:33 +msgid "Block Table" +msgstr "块表" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:35 +msgid "" +"CP performs sequence sharding on the KV cache storage. To facilitate " +"efficient storage and access, tokens are stored in an interleaved manner " +"across devices, with the interleaving granularity determined by " +"`cp_kv_cache_interleave_size`, whose default value is " +"`cp_kv_cache_interleave_size=1`, a.k.a. 'token interleave'." +msgstr "CP 对 KV 缓存存储执行序列分片。为了便于高效存储和访问,令牌以交错方式跨设备存储,交错粒度由 `cp_kv_cache_interleave_size` 决定,其默认值为 `cp_kv_cache_interleave_size=1`,也称为“令牌交错”。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:37 +msgid "" +"Given that PCP and DCP behave similarly for KV cache sharding, we refer " +"to them collectively as CP. Specifically, `cp_size = pcp_size * " +"dcp_size`, and `cp_rank = pcp_rank * dcp_size + dcp_rank`." +msgstr "鉴于 PCP 和 DCP 在 KV 缓存分片方面的行为相似,我们将它们统称为 CP。具体来说,`cp_size = pcp_size * dcp_size`,且 `cp_rank = pcp_rank * dcp_size + dcp_rank`。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:39 +msgid "" +"As illustrated, a virtual block is defined in the block table, where " +"blocks within the same CP device group form a virtual block. The virtual " +"block size is `virtual_block_size = block_size * cp_size`." +msgstr "如图所示,块表中定义了一个虚拟块,同一 CP 设备组内的块构成一个虚拟块。虚拟块大小为 `virtual_block_size = block_size * cp_size`。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:41 +#, python-format +msgid "" +"For any token `x`, referencing the following figure, its (virtual) block " +"index is `x // virtual_block_size`, and the offset within the virtual " +"block is `offset_within_virtual_block = x % virtual_block_size`. The " +"local block index is `local_block_index = offset_within_virtual_block // " +"cp_kv_cache_interleave_size`, and the device number is `target_rank = " +"local_block_index % cp_size`. The offset within the local block is " +"`(local_block_index // cp_size) * cp_kv_cache_interleave_size + " +"offset_within_virtual_block % cp_kv_cache_interleave_size`." +msgstr "对于任意令牌 `x`,参考下图,其(虚拟)块索引为 `x // virtual_block_size`,在虚拟块内的偏移量为 `offset_within_virtual_block = x % virtual_block_size`。本地块索引为 `local_block_index = offset_within_virtual_block // cp_kv_cache_interleave_size`,设备号为 `target_rank = local_block_index % cp_size`。在本地块内的偏移量为 `(local_block_index // cp_size) * cp_kv_cache_interleave_size + offset_within_virtual_block % cp_kv_cache_interleave_size`。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:45 +msgid "![BlockTable](../../assets/cp/blocktable.png)" +msgstr "![BlockTable](../../assets/cp/blocktable.png)" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:45 +msgid "BlockTable" +msgstr "BlockTable" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:47 +msgid "" +"Based on the logic above, the `slot_mapping` calculation process is " +"adjusted, and the `slot_mapping` values on each device are modified to " +"ensure the KV cache is sharded along the sequence dimension and stored " +"across different devices as expected." +msgstr "基于上述逻辑,调整了 `slot_mapping` 的计算过程,并修改了每个设备上的 `slot_mapping` 值,以确保 KV 缓存沿序列维度分片并按预期存储在不同设备上。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:49 +#, python-format +msgid "" +"The current implementation requires that `block_size % " +"cp_kv_cache_interleave_size == 0`." +msgstr "当前实现要求 `block_size % cp_kv_cache_interleave_size == 0`。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:51 +msgid "Decode Context Parallel (DCP)" +msgstr "解码上下文并行 (DCP)" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:53 +msgid "" +"As mentioned above, the primary function of DCP is to shard the KV cache " +"along the sequence dimension for storage. Its impact lies in the logic of" +" the decode and chunked prefill phases." +msgstr "如上所述,DCP 的主要功能是沿序列维度对 KV 缓存进行分片存储。其影响在于解码和分块预填充阶段的逻辑。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:55 +msgid "" +"**Prefill Phase:** As illustrated, during the Chunked Prefill " +"computation, two distinct logic implementations are employed for MLA and " +"GQA backends." +msgstr "**预填充阶段:** 如图所示,在分块预填充计算期间,MLA 和 GQA 后端采用了两种不同的逻辑实现。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:58 +msgid "" +"In the **MLA backend**, a Context KV Cache `all_gather` operation is " +"performed to aggregate the full KV values. These are then used for " +"attention computation with the Q values of the current chunk. Note that " +"in multi-request scenarios, the directly gathered KV results are " +"interleaved across requests. The `reorg_kvcache` function is used to " +"reorganize the KV cache, ensuring that the KV cache of the same request " +"is stored contiguously." +msgstr "在 **MLA 后端** 中,执行上下文 KV 缓存 `all_gather` 操作以聚合完整的 KV 值。然后这些值与当前分块的 Q 值一起用于注意力计算。请注意,在多请求场景中,直接收集的 KV 结果在请求间是交错的。使用 `reorg_kvcache` 函数来重新组织 KV 缓存,确保同一请求的 KV 缓存被连续存储。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:63 +msgid "" +"In the **GQA backend**, an `all_gather` is performed along the head " +"dimension for Q. This is because DCP overlaps with the TP communication " +"domain, and the Q heads within a DCP group differ. However, they need to " +"exchange results with the locally computed KV cache for online Softmax " +"updates. To ensure correctness during result updates, the Q values are " +"synchronized across the DCP group via head-dimension `all_gather`. During" +" the result update process, `cp_lse_ag_out_rs` is invoked to aggregate " +"`attn_output` and `attn_lse`, update the results, and perform a reduce-" +"scatter operation on the outputs. Alternatively, we can use an all-to-all" +" communication to exchange the output and LSE results, followed by direct" +" local updates. This approach aligns with the logic adapted for PCP " +"compatibility." +msgstr "在 **GQA 后端** 中,沿头维度对 Q 执行 `all_gather`。这是因为 DCP 与 TP 通信域重叠,且 DCP 组内的 Q 头不同。然而,它们需要与本地计算的 KV 缓存交换结果以进行在线 Softmax 更新。为确保结果更新过程中的正确性,Q 值通过头维度的 `all_gather` 在 DCP 组内同步。在结果更新过程中,调用 `cp_lse_ag_out_rs` 来聚合 `attn_output` 和 `attn_lse`,更新结果,并对输出执行 reduce-scatter 操作。或者,我们可以使用 all-to-all 通信来交换输出和 LSE 结果,然后直接进行本地更新。这种方法与为 PCP 兼容性而调整的逻辑一致。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:70 +msgid "![DCP-Prefill](../../assets/cp/dcp-prefill.png)" +msgstr "![DCP-Prefill](../../assets/cp/dcp-prefill.png)" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:70 +msgid "DCP-Prefill" +msgstr "DCP-Prefill" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:72 +msgid "" +"**Decode Phase:** The logic during the decode phase is consistent with " +"that of GQA's chunked prefill: an all-gather operation is first performed" +" along the Q head dimension to ensure consistency within the DCP group. " +"After computing the results with the local KV cache, the results are " +"updated via the `cp_lse_ag_out_rs` function." +msgstr "**解码阶段:** 解码阶段的逻辑与 GQA 的分块预填充一致:首先沿 Q 头维度执行 all-gather 操作以确保 DCP 组内的一致性。使用本地 KV 缓存计算结果后,通过 `cp_lse_ag_out_rs` 函数更新结果。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:76 +msgid "![DCP-Decode](../../assets/cp/dcp-decode.png)" +msgstr "![DCP-Decode](../../assets/cp/dcp-decode.png)" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:76 +msgid "DCP-Decode" +msgstr "DCP-Decode" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:78 +msgid "Prefill Context Parallel (PCP)" +msgstr "预填充上下文并行 (PCP)" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:80 +msgid "**Tokens Partition in Head-Tail Style**" +msgstr "**头尾式令牌分区**" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:82 +msgid "" +"PCP requires splitting the input sequence and ensuring balanced " +"computational load across devices during the prefill phase. We employ a " +"head-tail style for splitting and concatenation: specifically, the " +"sequence is first padded to a length of `2*pcp_size`, then divided into " +"`2*pcp_size` equal parts. The first part is merged with the last part, " +"the second part with the second last part, and so on, thereby assigning " +"computationally balanced chunks to each device. Additionally, since " +"allgather aggregation of KV or Q results in interleaved chunks from " +"different requests, we compute `pcp_allgather_restore_idx` to quickly " +"restore the original order." +msgstr "PCP 需要在预填充阶段分割输入序列并确保跨设备的计算负载均衡。我们采用头尾式进行分割和连接:具体来说,首先将序列填充到长度为 `2*pcp_size`,然后分成 `2*pcp_size` 个相等的部分。第一部分与最后一部分合并,第二部分与倒数第二部分合并,依此类推,从而为每个设备分配计算上均衡的分块。此外,由于 KV 或 Q 的 allgather 聚合会导致来自不同请求的交错分块,我们计算 `pcp_allgather_restore_idx` 以快速恢复原始顺序。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:87 +msgid "These logics are implemented in the function `_update_tokens_for_pcp`." +msgstr "这些逻辑在函数 `_update_tokens_for_pcp` 中实现。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:89 +msgid "![PCP-Partition](../../assets/cp/head-tail-style.png)" +msgstr "![PCP-Partition](../../assets/cp/head-tail-style.png)" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:89 +msgid "PCP-Partition" +msgstr "PCP-Partition" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:91 +msgid "**Prefill Phase:**" +msgstr "**预填充阶段:**" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:93 +msgid "" +"During the Prefill phase (excluding chunked prefill), we employ an all-" +"gather KV approach to address the issue of incomplete sequences on " +"individual GPUs. It is important to note that we only aggregate the KV " +"values for the current layer at a time, and these are discarded " +"immediately after use, avoiding excessive peak memory usage. This method " +"can also be directly applied to KV cache storage (since the KV cache " +"partitioning method differs from PCP sequence partitioning, it is " +"inevitable that each GPU requires a complete copy of the KV values). All " +"attention backends maintain consistency in this logic." +msgstr "在预填充阶段(不包括分块预填充),我们采用 all-gather KV 的方法来解决单个 GPU 上序列不完整的问题。需要注意的是,我们一次只聚合当前层的 KV 值,并且在使用后立即丢弃,以避免过高的峰值内存使用。此方法也可直接应用于 KV 缓存存储(由于 KV 缓存的分区方法与 PCP 序列分区不同,每个 GPU 都需要一份完整的 KV 值副本是不可避免的)。所有注意力后端在此逻辑上保持一致。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:98 +msgid "" +"Note: While a Ring Attention approach could also facilitate information " +"exchange with lower peak memory and enable computation-communication " +"overlap, we prioritized the all-gather KV implementation after evaluating" +" that the development complexity was high and the benefits of overlap " +"were limited." +msgstr "注意:虽然环形注意力方法也能以更低的峰值内存促进信息交换并实现计算-通信重叠,但在评估了开发复杂度高且重叠收益有限后,我们优先实现了 all-gather KV 方案。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:100 +msgid "![PCP-Prefill](../../assets/cp/pcp-prefill.png)" +msgstr "![PCP-Prefill](../../assets/cp/pcp-prefill.png)" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:100 +msgid "PCP-Prefill" +msgstr "PCP-Prefill" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:102 +msgid "**Decode Phase:**" +msgstr "**解码阶段:**" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:104 +msgid "" +"During the decode phase, we only need to add an allgather within the PCP " +"group after the DCP all-to-all communication exchanges the output and " +"LSE, before proceeding with the output update." +msgstr "在解码阶段,我们只需要在 DCP all-to-all 通信交换输出和 LSE 之后,于 PCP 组内添加一个 allgather,然后再进行输出更新。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:106 +msgid "![PCP-Decode](../../assets/cp/pcp-decode.png)" +msgstr "![PCP-Decode](../../assets/cp/pcp-decode.png)" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:106 +msgid "PCP-Decode" +msgstr "PCP-Decode" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:108 +msgid "**Chunked Prefill:**" +msgstr "**分块预填充:**" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:110 +msgid "" +"Currently, there are three viable approaches for Chunked Prefill " +"compatibility: **AllGatherQ**, **AllGatherKV**, and **Ring-Attn**. Since " +"PCP performs sequence sharding on both the query sequence and the KV " +"cache, we need to ensure that one side has complete information or employ" +" a method like Ring-Attn to perform computations sequentially. The " +"advantages and disadvantages of Ring-Attn will not be elaborated here." +msgstr "目前,有三种可行的分块预填充兼容性方法:**AllGatherQ**、**AllGatherKV** 和 **Ring-Attn**。由于 PCP 对查询序列和 KV 缓存都执行序列分片,我们需要确保其中一方拥有完整信息,或者采用类似 Ring-Attn 的方法顺序执行计算。Ring-Attn 的优缺点在此不赘述。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:114 +msgid "" +"We have implemented the **AllGatherQ** approach in the GQA attention " +"backend and the **AllGatherKV** approach in the MLA attention backend. " +"The workflow after **AllGatherQ** is identical to the decode phase, while" +" the workflow after **AllGatherKV** is the same as the standard prefill " +"phase. For details, please refer to the diagram below; specific steps " +"will not be repeated." +msgstr "我们已在 GQA 注意力后端实现了 **AllGatherQ** 方法,并在 MLA 注意力后端实现了 **AllGatherKV** 方法。**AllGatherQ** 之后的工作流与解码阶段相同,而 **AllGatherKV** 之后的工作流与标准预填充阶段相同。详情请参考下图;具体步骤不再赘述。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:118 +msgid "" +"One important note: **AllGatherKV** may lead to significant peak memory " +"usage when the context length becomes excessively long. To mitigate this," +" we adopt a segmented processing strategy. By predefining the maximum " +"amount of KV cache processed per round, we sequentially complete the " +"attention computation and online softmax updates for each segment." +msgstr "" +"一个重要注意事项:当上下文长度变得过长时,**AllGatherKV** 可能导致显著的峰值内存使用。为了缓解这个问题,我们采用了分段处理策略。通过预定义每轮处理的 KV 缓存最大量,我们依次完成每个分段的注意力计算和在线 softmax 更新。" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:122 +msgid "![PCP-ChunkedPrefill](../../assets/cp/chunkedprefill.png)" +msgstr "![PCP-ChunkedPrefill](../../assets/cp/chunkedprefill.png)" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:122 +msgid "PCP-ChunkedPrefill" +msgstr "PCP-ChunkedPrefill" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:124 +msgid "Related Files" +msgstr "相关文件" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:126 +msgid "slot_mapping computation: `vllm_ascend/worker/block_table.py`" +msgstr "slot_mapping 计算:`vllm_ascend/worker/block_table.py`" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:127 +msgid "" +"sequences splitting and metadata prepare: " +"`vllm_ascend/worker/model_runner_v1.py`" +msgstr "序列拆分与元数据准备:`vllm_ascend/worker/model_runner_v1.py`" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:128 +msgid "GQA backend: `vllm_ascend/attention/attention_cp.py`" +msgstr "GQA 后端:`vllm_ascend/attention/attention_cp.py`" + +#: ../../source/developer_guide/Design_Documents/context_parallel.md:129 +msgid "MLA backend: `vllm_ascend/attention/mla_cp.py`" +msgstr "MLA 后端:`vllm_ascend/attention/mla_cp.py`" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/cpu_binding.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/cpu_binding.po new file mode 100644 index 00000000..98a91297 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/cpu_binding.po @@ -0,0 +1,814 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:1 +msgid "CPU Binding" +msgstr "CPU 绑定" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:3 +msgid "Overview" +msgstr "概述" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:5 +msgid "" +"CPU binding pins vLLM Ascend worker processes and key threads to specific" +" CPU cores to reduce CPU–NPU cross‑NUMA traffic and stabilize latency " +"under multi‑process workloads. It is designed for ARM servers running " +"Ascend NPUs and is automatically executed during worker initialization " +"when enabled." +msgstr "" +"CPU 绑定将 vLLM Ascend 工作进程和关键线程固定到特定的 CPU 核心,以减少 CPU-" +"NPU 跨 NUMA 流量,并在多进程工作负载下稳定延迟。它专为运行 Ascend NPU 的 ARM " +"服务器设计,启用后会在工作进程初始化期间自动执行。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:7 +msgid "Background" +msgstr "背景" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:9 +msgid "" +"On multi‑socket ARM systems, the OS scheduler may place vLLM threads on " +"CPUs far from the local NPU, causing NUMA cross‑traffic and jitter. CPU " +"binding enforces a deterministic CPU placement strategy and optionally " +"binds NPU IRQs to the same CPU pool. This is distinct from other " +"performance features (e.g., graph mode or dynamic batch) because it is " +"purely a host‑side affinity policy and does not change model execution " +"logic." +msgstr "" +"在多插槽 ARM 系统上,操作系统调度器可能会将 vLLM 线程放置在远离本地 NPU 的 " +"CPU 上,从而导致 NUMA 跨域流量和延迟抖动。CPU 绑定强制执行一种确定性的 CPU " +"放置策略,并可选地将 NPU IRQ 绑定到同一个 CPU 池。这与其他性能特性(如图模式" +"或动态批处理)不同,因为它纯粹是主机端的亲和性策略,不改变模型执行逻辑。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:11 +msgid "Design & How it works" +msgstr "设计与工作原理" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:13 +msgid "Key concepts" +msgstr "关键概念" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:15 +msgid "" +"**Allowed CPU list**: The cpuset from /proc/self/status " +"(Cpus_allowed_list). All allocations are constrained to this list." +msgstr "" +"**允许的 CPU 列表**:来自 /proc/self/status (Cpus_allowed_list) 的 cpuset。" +"所有分配都受限于此列表。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:16 +msgid "" +"**Running NPU list**: Logical NPU IDs extracted from npu‑smi process " +"listing, optionally filtered by ASCEND_RT_VISIBLE_DEVICES." +msgstr "" +"**运行中的 NPU 列表**:从 npu-smi 进程列表中提取的逻辑 NPU ID,可选地由 " +"ASCEND_RT_VISIBLE_DEVICES 过滤。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:17 +msgid "" +"**CPU pool per NPU**: The CPU list assigned to each logical NPU ID based " +"on the binding mode." +msgstr "" +"**每个 NPU 的 CPU 池**:根据绑定模式分配给每个逻辑 NPU ID 的 CPU 列表。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:18 +msgid "**Binding modes & Device behavior**:" +msgstr "**绑定模式与设备行为**:" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "Device type" +msgstr "设备类型" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "Default mode" +msgstr "默认模式" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "Description" +msgstr "描述" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "A3 (No Affinity)" +msgstr "A3 (无亲和性)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "`global_slice`" +msgstr "`global_slice`" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "" +"Splits the allowed CPU list evenly based on the **total number of global " +"logical NPUs**, ensuring each NPU is assigned a contiguous segment of CPU" +" cores. This prevents CPU core overlap across multiple process groups." +msgstr "" +"根据**全局逻辑 NPU 总数**均匀分割允许的 CPU 列表,确保每个 NPU 被分配一个连" +"续的 CPU 核心段。这可以防止多个进程组之间的 CPU 核心重叠。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "A2 / 310P / Others" +msgstr "A2 / 310P / 其他" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "`topo_affinity`" +msgstr "`topo_affinity`" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "" +"Allocates CPUs based on NPU topology affinity (`npu‑smi info -t topo`). " +"If multiple NPUs are assigned to a single NUMA node (which may cause " +"bandwidth contention), the CPU allocation extends to adjacent NUMA nodes." +msgstr "" +"基于 NPU 拓扑亲和性 (`npu-smi info -t topo`) 分配 CPU。如果多个 NPU 被分配" +"到单个 NUMA 节点(可能导致带宽争用),则 CPU 分配会扩展到相邻的 NUMA 节点。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:25 +msgid "**Default**: enabled (enable_cpu_binding = true)." +msgstr "**默认**:启用 (enable_cpu_binding = true)。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:26 +msgid "**Fallback**: If NPU topo affinity is unavailable, global_slice is used." +msgstr "**回退**:如果 NPU 拓扑亲和性不可用,则使用 global_slice。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:27 +msgid "" +"**Failure handling**: Any exception in binding is logged as a warning and" +" **binding is skipped for that rank**." +msgstr "" +"**故障处理**:绑定过程中的任何异常都会记录为警告,并且**跳过该等级的绑定**。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:29 +msgid "Execution flow (simplified)" +msgstr "执行流程(简化版)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:31 +msgid "" +"**Feature entry**: worker initialization calls `bind_cpus(local_rank)` " +"when `enable_cpu_binding` is true." +msgstr "" +"**功能入口**:当 `enable_cpu_binding` 为 true 时,工作进程初始化会调用 " +"`bind_cpus(local_rank)`。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:32 +msgid "" +"**CPU architecture gate**: If the CPU is not ARM, binding is skipped with" +" a log." +msgstr "**CPU 架构门控**:如果 CPU 不是 ARM,则记录日志并跳过绑定。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:33 +msgid "**Collect device info**:" +msgstr "**收集设备信息**:" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:34 +msgid "Map logical NPU IDs from `npu‑smi info -m`." +msgstr "从 `npu-smi info -m` 映射逻辑 NPU ID。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:35 +msgid "Detect running NPU IDs from npu‑smi info process table." +msgstr "从 npu-smi info 进程表中检测运行中的 NPU ID。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:36 +msgid "Read cpuset from /proc/self/status." +msgstr "从 /proc/self/status 读取 cpuset。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:37 +msgid "Read topo affinity from `npu‑smi info -t topo`." +msgstr "从 `npu-smi info -t topo` 读取拓扑亲和性。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:38 +msgid "**Build CPU pools**:" +msgstr "**构建 CPU 池**:" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:39 +msgid "Use **global_slice** for A3 devices; **topo_affinity** for A2 and 310P." +msgstr "对 A3 设备使用 **global_slice**;对 A2 和 310P 使用 **topo_affinity**。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:40 +msgid "If topo affinity is missing, fall back to global_slice." +msgstr "如果缺少拓扑亲和性,则回退到 global_slice。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:41 +msgid "Ensure each NPU has at least 5 CPUs." +msgstr "确保每个 NPU 至少有 5 个 CPU。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:42 +msgid "**Allocate per‑role CPUs**:" +msgstr "**分配按角色划分的 CPU**:" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:43 +msgid "Reserve the first two CPUs for IRQ binding." +msgstr "保留前两个 CPU 用于 IRQ 绑定。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:44 +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:62 +msgid "`main`: pool[2:-2]" +msgstr "`main`: pool[2:-2]" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:45 +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:63 +msgid "`acl`: pool[-2]" +msgstr "`acl`: pool[-2]" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:46 +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:64 +msgid "`release`: pool[-1]" +msgstr "`release`: pool[-1]" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:47 +msgid "**Bind threads**:" +msgstr "**绑定线程**:" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:48 +msgid "Main process is pinned to `main` CPUs." +msgstr "主进程被固定到 `main` CPU。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:49 +msgid "ACL threads (named with acl_thread) are pinned to `acl` CPU." +msgstr "ACL 线程(以 acl_thread 命名)被固定到 `acl` CPU。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:50 +msgid "Release threads (named with release_thread) are pinned to `release` CPU." +msgstr "释放线程(以 release_thread 命名)被固定到 `release` CPU。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:51 +msgid "**Bind NPU IRQs (optional)**:" +msgstr "**绑定 NPU IRQ(可选)**:" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:52 +msgid "" +"If /proc/irq is writable, bind SQ/CQ IRQs to the first two CPUs in the " +"pool." +msgstr "如果 /proc/irq 可写,则将 SQ/CQ IRQ 绑定到池中的前两个 CPU。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:53 +msgid "irqbalance may be stopped to prevent overrides." +msgstr "可能会停止 irqbalance 以防止覆盖。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:54 +msgid "**Memory binding (optional)**:" +msgstr "**内存绑定(可选)**:" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:55 +msgid "" +"If migratepages is available, memory for ACL threads is migrated to the " +"NPU’s NUMA node." +msgstr "如果 migratepages 可用,则将 ACL 线程的内存迁移到 NPU 的 NUMA 节点。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:57 +msgid "Allocation plan examples" +msgstr "分配方案示例" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:59 +msgid "" +"The allocation plan is derived directly from the CPU pool per NPU and " +"then split into roles:" +msgstr "分配方案直接来源于每个 NPU 的 CPU 池,然后按角色划分:" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:61 +msgid "IRQ CPUs: pool[0], pool[1]" +msgstr "IRQ CPU: pool[0], pool[1]" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:66 +msgid "Below are concrete examples that reflect the actual code paths." +msgstr "以下是反映实际代码路径的具体示例。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:68 +msgid "Example 1: A3 inference server with 640 CPUs and 16 NPUs" +msgstr "示例 1:具有 640 个 CPU 和 16 个 NPU 的 A3 推理服务器" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:70 +msgid "allowed_cpus = [0..639] (640 CPUs)" +msgstr "allowed_cpus = [0..639] (640 个 CPU)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:71 +msgid "NUMA nodes = 0..7 (8 NUMA nodes, symmetric layout)" +msgstr "NUMA 节点 = 0..7 (8 个 NUMA 节点,对称布局)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:72 +msgid "total_npus = 16" +msgstr "total_npus = 16" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:73 +msgid "running_npu_list = [0..15]" +msgstr "running_npu_list = [0..15]" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:74 +msgid "base = 640 // 16 = 40, extra = 0" +msgstr "base = 640 // 16 = 40, extra = 0" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:75 +msgid "Each NPU gets a 40‑CPU pool." +msgstr "每个 NPU 获得一个 40 个 CPU 的池。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "NPU ID" +msgstr "NPU ID" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "Assigned CPU Cores (global_slice)" +msgstr "分配的 CPU 核心 (global_slice)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "Role Division (IRQ/Main/ACL/Release)" +msgstr "角色划分 (IRQ/Main/ACL/Release)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "0" +msgstr "0" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "0-39" +msgstr "0-39" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "`IRQ`: 0-1, `Main`: 2-37, `ACL`: 38, `Release`: 39" +msgstr "`IRQ`: 0-1, `Main`: 2-37, `ACL`: 38, `Release`: 39" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "1" +msgstr "1" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "40-79" +msgstr "40-79" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "`IRQ`: 40-41, `Main`: 42-77, `ACL`: 78, `Release`: 79" +msgstr "`IRQ`: 40-41, `Main`: 42-77, `ACL`: 78, `Release`: 79" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "..." +msgstr "..." + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "15" +msgstr "15" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "600-639" +msgstr "600-639" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "`IRQ`: 600-601, `Main`: 602-637, `ACL`: 638, `Release`: 639" +msgstr "`IRQ`: 600-601, `Main`: 602-637, `ACL`: 638, `Release`: 639" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:84 +msgid "" +"This layout remains deterministic even when multiple processes share the " +"same cpuset, because slicing is based on the global logical NPU ID." +msgstr "" +"即使多个进程共享同一个 cpuset,此布局也保持确定性,因为切片是基于全局逻辑 " +"NPU ID 的。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:86 +msgid "Example 2: A3 global_slice, even split" +msgstr "示例 2:A3 global_slice,均匀分割" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:88 +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:109 +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:142 +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:161 +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:182 +msgid "**Inputs**:" +msgstr "**输入**:" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:91 +msgid "" +"NUMA nodes = 0..1 (2 NUMA nodes, symmetric layout; NUMA0 = 0..11, NUMA1 =" +" 12..23)" +msgstr "NUMA 节点 = 0..1 (2个NUMA节点,对称布局;NUMA0 = 0..11, NUMA1 = 12..23)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:92 +msgid "total_npus = 4 (from npu-smi info -m)" +msgstr "total_npus = 4 (来自 npu-smi info -m)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:93 +msgid "running_npu_list = [0, 1, 2, 3]" +msgstr "running_npu_list = [0, 1, 2, 3]" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:95 +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:116 +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:149 +msgid "**Global slice**:" +msgstr "**全局切片**:" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:97 +msgid "base = 24 // 4 = 6, extra = 0" +msgstr "base = 24 // 4 = 6, extra = 0" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:98 +msgid "Each NPU gets a 6‑CPU pool." +msgstr "每个NPU获得一个包含6个CPU的池。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "0-5" +msgstr "0-5" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "`IRQ`: 0-1, `Main`: 2-3, `ACL`: 4, `Release`: 5" +msgstr "`IRQ`: 0-1, `Main`: 2-3, `ACL`: 4, `Release`: 5" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "6-11" +msgstr "6-11" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "`IRQ`: 6-7, `Main`: 8-9, `ACL`: 10, `Release`: 11" +msgstr "`IRQ`: 6-7, `Main`: 8-9, `ACL`: 10, `Release`: 11" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "2" +msgstr "2" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "12-17" +msgstr "12-17" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "`IRQ`: 12-13, `Main`: 14-15, `ACL`: 16, `Release`: 17" +msgstr "`IRQ`: 12-13, `Main`: 14-15, `ACL`: 16, `Release`: 17" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "3" +msgstr "3" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "18-23" +msgstr "18-23" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "`IRQ`: 18-19, `Main`: 20-21, `ACL`: 22, `Release`: 23" +msgstr "`IRQ`: 18-19, `Main`: 20-21, `ACL`: 22, `Release`: 23" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:107 +msgid "Example 3: A3 global_slice, remainder distribution" +msgstr "示例 3: A3 global_slice,余数分配" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:111 +msgid "allowed_cpus = [0..16] (17 CPUs)" +msgstr "allowed_cpus = [0..16] (17个CPU)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:112 +msgid "" +"NUMA nodes = 0..1 (2 NUMA nodes, symmetric layout; NUMA0 = 0..7, NUMA1 = " +"8..16)" +msgstr "NUMA 节点 = 0..1 (2个NUMA节点,对称布局;NUMA0 = 0..7, NUMA1 = 8..16)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:113 +msgid "total_npus = 3" +msgstr "total_npus = 3" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:114 +msgid "running_npu_list = [0, 1, 2]" +msgstr "running_npu_list = [0, 1, 2]" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:118 +msgid "base = 17 // 3 = 5, extra = 2" +msgstr "base = 17 // 3 = 5, extra = 2" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:119 +msgid "NPU0 pool size = 6 (base+1)" +msgstr "NPU0 池大小 = 6 (base+1)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:120 +msgid "NPU1 pool size = 6 (base+1)" +msgstr "NPU1 池大小 = 6 (base+1)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:121 +msgid "NPU2 pool size = 5 (base)" +msgstr "NPU2 池大小 = 5 (base)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "12-16" +msgstr "12-16" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "`IRQ`: 12-13, `Main`: 14, `ACL`: 15, `Release`: 16" +msgstr "`IRQ`: 12-13, `Main`: 14, `ACL`: 15, `Release`: 16" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:129 +msgid "" +"Note: When a pool size is exactly 5, `main` has a single CPU (pool[2]). " +"If any pool is <5, binding raises an error." +msgstr "注意:当池大小恰好为5时,`main` 只有一个CPU (pool[2])。如果任何池小于5,绑定将引发错误。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:131 +msgid "**NUMA analysis**:" +msgstr "**NUMA 分析**:" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:133 +msgid "" +"With the symmetric NUMA layout above (NUMA0 = 0..7, NUMA1 = 8..16), NPU0 " +"stays within NUMA0, NPU2 stays within NUMA1, but NPU1 spans both NUMA0 " +"(6,7) and NUMA1 (8..11). This is a direct consequence of global slicing " +"over the ordered cpuset; the remainder distribution does not enforce NUMA" +" boundaries." +msgstr "在上述对称NUMA布局中 (NUMA0 = 0..7, NUMA1 = 8..16),NPU0保持在NUMA0内,NPU2保持在NUMA1内,但NPU1跨越了NUMA0 (6,7) 和 NUMA1 (8..11)。这是对有序cpuset进行全局切片的直接结果;余数分配不强制NUMA边界。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:134 +msgid "" +"If the cpuset numbering is interleaved across NUMA nodes (non‑symmetric " +"layout), cross‑NUMA pools can happen even earlier. This is why symmetric " +"NUMA layout is recommended for best locality." +msgstr "如果cpuset编号在NUMA节点间交错(非对称布局),跨NUMA池可能更早发生。这就是为什么推荐对称NUMA布局以获得最佳局部性。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:136 +msgid "Known limitations and future improvements" +msgstr "已知限制与未来改进" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:138 +msgid "" +"With the current `global_slice` strategy, some CPU/NPU layouts cannot " +"avoid cross‑NUMA pools. A future enhancement should incorporate NUMA node" +" boundaries into the slicing logic so that pools remain within a single " +"NUMA node whenever possible." +msgstr "使用当前的 `global_slice` 策略,某些CPU/NPU布局无法避免跨NUMA池。未来的增强应将NUMA节点边界纳入切片逻辑,以便池尽可能保持在单个NUMA节点内。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:140 +msgid "Example 4: global_slice with visible subset of NPUs" +msgstr "示例 4: 使用NPU可见子集的 global_slice" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:144 +msgid "total_npus = 8 (from npu-smi info -m)" +msgstr "total_npus = 8 (来自 npu-smi info -m)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:145 +msgid "running_npu_list = [2, 3] (filtered by ASCEND_RT_VISIBLE_DEVICES)" +msgstr "running_npu_list = [2, 3] (由 ASCEND_RT_VISIBLE_DEVICES 过滤)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:146 +msgid "allowed_cpus = [0..39] (40 CPUs)" +msgstr "allowed_cpus = [0..39] (40个CPU)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:147 +msgid "" +"NUMA nodes = 0..3 (4 NUMA nodes, symmetric layout; 0..9, 10..19, 20..29, " +"30..39)" +msgstr "NUMA 节点 = 0..3 (4个NUMA节点,对称布局;0..9, 10..19, 20..29, 30..39)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:151 +msgid "base = 40 // 8 = 5, extra = 0" +msgstr "base = 40 // 8 = 5, extra = 0" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:152 +msgid "" +"Only the visible logical NPUs get pools, but slicing uses the global NPU " +"ID so different processes do not overlap." +msgstr "只有可见的逻辑NPU获得池,但切片使用全局NPU ID,因此不同进程不会重叠。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "10-14" +msgstr "10-14" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "`IRQ`: 10-11, `Main`: 12, `ACL`: 13, `Release`: 14" +msgstr "`IRQ`: 10-11, `Main`: 12, `ACL`: 13, `Release`: 14" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "15-19" +msgstr "15-19" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "`IRQ`: 15-16, `Main`: 17, `ACL`: 18, `Release`: 19" +msgstr "`IRQ`: 15-16, `Main`: 17, `ACL`: 18, `Release`: 19" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:159 +msgid "Example 5: A2/310P topo_affinity with NUMA extension" +msgstr "示例 5: 具有NUMA扩展的 A2/310P topo_affinity" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:163 +#, python-brace-format +msgid "npu_affinity = {0: [0..7], 1: [0..7]} (from `npu-smi info -t topo`)" +msgstr "npu_affinity = {0: [0..7], 1: [0..7]} (来自 `npu-smi info -t topo`)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:164 +msgid "allowed_cpus = [0..15] (16 CPUs)" +msgstr "allowed_cpus = [0..15] (16个CPU)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:165 +msgid "NUMA nodes = 0..1 (2 NUMA nodes; NUMA0 = 0..7, NUMA1 = 8..15)" +msgstr "NUMA 节点 = 0..1 (2个NUMA节点;NUMA0 = 0..7, NUMA1 = 8..15)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:167 +msgid "**NUMA extension**:" +msgstr "**NUMA 扩展**:" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:169 +msgid "" +"Both NPUs are on NUMA0, so each pool extends to the nearest NUMA node to " +"reduce contention." +msgstr "两个NPU都在NUMA0上,因此每个池扩展到最近的NUMA节点以减少争用。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:170 +msgid "NPU0 extends to NUMA1 -> [0..15]" +msgstr "NPU0 扩展到 NUMA1 -> [0..15]" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:171 +msgid "NPU1 extends to NUMA1 -> [0..15]" +msgstr "NPU1 扩展到 NUMA1 -> [0..15]" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:173 +msgid "" +"Because both pools are identical, the allocator applies average " +"distribution across NPUs to avoid overlap. With a pool [0..15] and 2 " +"NPUs, the final pools become:" +msgstr "由于两个池相同,分配器应用跨NPU的平均分配以避免重叠。对于池 [0..15] 和 2个NPU,最终池变为:" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "Assigned CPU Cores (topo_affinity)" +msgstr "分配的CPU核心 (topo_affinity)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "0-7" +msgstr "0-7" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "`IRQ`: 0-1, `Main`: 2-5, `ACL`: 6, `Release`: 7" +msgstr "`IRQ`: 0-1, `Main`: 2-5, `ACL`: 6, `Release`: 7" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "8-15" +msgstr "8-15" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "`IRQ`: 8-9, `Main`: 10-13, `ACL`: 14, `Release`: 15" +msgstr "`IRQ`: 8-9, `Main`: 10-13, `ACL`: 14, `Release`: 15" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:180 +msgid "Example 6: Minimum CPUs per NPU" +msgstr "示例 6: 每个NPU的最小CPU数" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:184 +msgid "total_npus = 2" +msgstr "total_npus = 2" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:185 +msgid "allowed_cpus = [0..7] (8 CPUs)" +msgstr "allowed_cpus = [0..7] (8个CPU)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:186 +msgid "" +"NUMA nodes = 0..1 (2 NUMA nodes, symmetric layout; NUMA0 = 0..3, NUMA1 = " +"4..7)" +msgstr "NUMA 节点 = 0..1 (2个NUMA节点,对称布局;NUMA0 = 0..3, NUMA1 = 4..7)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:188 +msgid "**Result**:" +msgstr "**结果**:" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:190 +msgid "" +"base = 4, which is < 5, so binding fails with: \"Insufficient CPUs for " +"binding with IRQ/ACL/REL reservations...\"" +msgstr "base = 4,小于5,因此绑定失败,错误信息为:\"用于IRQ/ACL/REL预留绑定的CPU不足...\"" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "Assigned CPU Cores" +msgstr "分配的CPU核心" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "N/A" +msgstr "不适用" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md +msgid "Binding error (insufficient CPUs per NPU)" +msgstr "绑定错误(每个NPU的CPU不足)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:197 +msgid "" +"To resolve, either reduce total_npus or enlarge the cpuset so that each " +"NPU has at least 5 CPUs." +msgstr "要解决此问题,要么减少 total_npus,要么扩大 cpuset,使每个NPU至少有5个CPU。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:199 +msgid "Logging and verification" +msgstr "日志记录与验证" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:201 +msgid "Logs show the selected binding mode and the allocation plan, for example:" +msgstr "日志显示选定的绑定模式和分配计划,例如:" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:202 +msgid "`[cpu_bind_mode] mode=global_slice rank=0 visible_npus=[...]`" +msgstr "`[cpu_bind_mode] mode=global_slice rank=0 visible_npus=[...]`" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:203 +msgid "`The CPU allocation plan is as follows: ...`" +msgstr "`CPU分配计划如下:...`" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:204 +msgid "You can verify affinity via taskset or `/proc//status` after startup." +msgstr "启动后,您可以通过 taskset 或 `/proc//status` 验证亲和性。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:206 +msgid "Limitations & Notes" +msgstr "限制与注意事项" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:208 +msgid "**ARM‑only**: Binding is skipped on non‑ARM CPUs." +msgstr "**仅限ARM**:在非ARM CPU上跳过绑定。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:209 +msgid "" +"**Minimum CPU requirement**: Each logical NPU requires at least 5 CPUs. " +"If the cpuset is smaller, binding fails with an error." +msgstr "**最小CPU要求**:每个逻辑NPU至少需要5个CPU。如果cpuset更小,绑定将失败并报错。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:210 +msgid "" +"**NUMA symmetry assumption**: For best locality, the current strategies " +"assume the cpuset is evenly distributed across NUMA nodes and CPU " +"numbering aligns with NUMA layout; otherwise NUMA locality may be " +"suboptimal." +msgstr "**NUMA对称性假设**:为获得最佳局部性,当前策略假设cpuset在NUMA节点间均匀分布,且CPU编号与NUMA布局对齐;否则NUMA局部性可能不理想。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:211 +msgid "" +"Example (symmetric layout): 2 NUMA nodes, 64 CPUs total. NUMA0 = CPUs " +"0–31, NUMA1 = CPUs 32–63, and the cpuset is 0–63. With 4 logical NPUs, " +"global slicing yields 16 CPUs per NPU (0–15, 16–31, 32–47, 48–63), so " +"each NPU’s pool stays within a single NUMA node." +msgstr "示例(对称布局):2个NUMA节点,总共64个CPU。NUMA0 = CPU 0–31,NUMA1 = CPU 32–63,cpuset为0–63。对于4个逻辑NPU,全局切片每个NPU产生16个CPU (0–15, 16–31, 32–47, 48–63),因此每个NPU的池保持在单个NUMA节点内。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:212 +msgid "**Runtime dependencies**:" +msgstr "**运行时依赖**:" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:213 +msgid "Requires npu‑smi and lscpu commands." +msgstr "需要 npu‑smi 和 lscpu 命令。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:214 +msgid "IRQ binding requires write access to /proc/irq." +msgstr "IRQ绑定需要对 /proc/irq 的写访问权限。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:215 +msgid "Memory binding requires migratepages; otherwise it is skipped." +msgstr "内存绑定需要 migratepages;否则将被跳过。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:216 +msgid "" +"**IRQ side effects**: irqbalance may be stopped to avoid overriding " +"bindings." +msgstr "**IRQ副作用**:可能会停止 irqbalance 以避免覆盖绑定。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:217 +msgid "" +"**Per‑process behavior**: Only the current rank’s NPU is used for IRQ " +"binding to avoid cross‑process overwrite." +msgstr "**每进程行为**:仅使用当前 rank 的 NPU 进行 IRQ 绑定,以避免跨进程覆盖。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:219 +msgid "Debug logging" +msgstr "调试日志" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:221 +msgid "" +"Use the standard vLLM logging configuration to enable debug logs. The " +"binding process emits debug messages (e.g., `[cpu_global_slice] ...`) " +"when debug level is enabled." +msgstr "使用标准的 vLLM 日志配置来启用调试日志。当启用调试级别时,绑定过程会发出调试消息(例如 `[cpu_global_slice] ...`)。" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:223 +msgid "References" +msgstr "参考" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:225 +msgid "" +"CPU binding implementation: vllm_ascend/cpu_binding.py (`DeviceInfo`, " +"`CpuAlloc`, `bind_cpus`)" +msgstr "CPU 绑定实现:vllm_ascend/cpu_binding.py (`DeviceInfo`, `CpuAlloc`, `bind_cpus`)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:226 +msgid "" +"Worker integration: vllm_ascend/worker/worker.py " +"(`NPUWorker._init_device`)" +msgstr "Worker 集成:vllm_ascend/worker/worker.py (`NPUWorker._init_device`)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:227 +msgid "" +"Additional config option: " +"docs/source/user_guide/configuration/additional_config.md " +"(`enable_cpu_binding`)" +msgstr "附加配置选项:docs/source/user_guide/configuration/additional_config.md (`enable_cpu_binding`)" + +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:228 +msgid "Tests: tests/ut/device_allocator/test_cpu_binding.py" +msgstr "测试:tests/ut/device_allocator/test_cpu_binding.py" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/disaggregated_prefill.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/disaggregated_prefill.po new file mode 100644 index 00000000..b7b657a9 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/disaggregated_prefill.po @@ -0,0 +1,360 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:1 +msgid "Disaggregated-prefill" +msgstr "解耦式预填充" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:3 +msgid "Why disaggregated-prefill?" +msgstr "为何需要解耦式预填充?" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:5 +msgid "" +"This feature addresses the need to optimize the **Time Per Output Token " +"(TPOT)** and **Time To First Token (TTFT)** in large-scale inference " +"tasks. The motivation is two-fold:" +msgstr "" +"此功能旨在优化大规模推理任务中的**单输出令牌时间 (TPOT)** 和**首令牌时间 " +"(TTFT)**。其动机主要有两方面:" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:7 +msgid "" +"**Adjusting Parallel Strategy and Instance Count for P and D Nodes** " +"Using the disaggregated-prefill strategy, this feature allows the system " +"to flexibly adjust the parallelization strategy (e.g., data parallelism " +"(dp), tensor parallelism (tp), and expert parallelism (ep)) and the " +"instance count for both P (Prefiller) and D (Decoder) nodes. This leads " +"to better system performance tuning, particularly for **TTFT** and " +"**TPOT**." +msgstr "" +"**调整 P 节点和 D 节点的并行策略与实例数量** 采用解耦式预填充策略,此功能允许系统灵活调整 P(预填充器)节点和 D(解码器)节点的并行化策略(例如数据并行 (dp)、张量并行 (tp) 和专家并行 (ep))以及实例数量。这有助于实现更好的系统性能调优,特别是针对 **TTFT** 和 **TPOT**。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:10 +msgid "" +"**Optimizing TPOT** Without the disaggregated-prefill strategy, prefill " +"tasks are inserted during decoding, which results in inefficiencies and " +"delays. Disaggregated-prefill solves this by allowing for better control " +"over the system’s **TPOT**. By managing chunked prefill tasks " +"effectively, the system avoids the challenge of determining the optimal " +"chunk size and provides more reliable control over the time taken for " +"generating output tokens." +msgstr "" +"**优化 TPOT** 在没有解耦式预填充策略的情况下,预填充任务会在解码过程中插入,导致效率低下和延迟。解耦式预填充通过允许更好地控制系统 **TPOT** 来解决此问题。通过有效管理分块的预填充任务,系统避免了确定最佳分块大小的挑战,并对生成输出令牌所需时间提供了更可靠的控制。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:15 +msgid "Usage" +msgstr "使用方法" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:17 +msgid "" +"vLLM Ascend currently supports two types of connectors for handling KV " +"cache management:" +msgstr "vLLM Ascend 目前支持两种用于处理 KV 缓存管理的连接器:" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:19 +msgid "**MooncakeConnector**: D nodes pull KV cache from P nodes." +msgstr "**MooncakeConnector**:D 节点从 P 节点拉取 KV 缓存。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:20 +msgid "" +"**MooncakeLayerwiseConnector**: P nodes push KV cache to D nodes in a " +"layered manner." +msgstr "**MooncakeLayerwiseConnector**:P 节点以分层方式将 KV 缓存推送到 D 节点。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:22 +msgid "" +"For step-by-step deployment and configuration, refer to the following " +"guide: " +"[https://docs.vllm.ai/projects/ascend/en/latest/tutorials/features/pd_disaggregation_mooncake_multi_node.html](https://docs.vllm.ai/projects/ascend/en/latest/tutorials/features/pd_disaggregation_mooncake_multi_node.html)" +msgstr "" +"有关分步部署和配置,请参考以下指南: " +"[https://docs.vllm.ai/projects/ascend/en/latest/tutorials/features/pd_disaggregation_mooncake_multi_node.html](https://docs.vllm.ai/projects/ascend/en/latest/tutorials/features/pd_disaggregation_mooncake_multi_node.html)" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:27 +msgid "How It Works" +msgstr "工作原理" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:29 +msgid "1. Design Approach" +msgstr "1. 设计思路" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:31 +msgid "" +"Under the disaggregated-prefill, a global proxy receives external " +"requests, forwarding prefill to P nodes and decode to D nodes; the KV " +"cache (key–value cache) is exchanged between P and D nodes via peer-to-" +"peer (P2P) communication." +msgstr "" +"在解耦式预填充架构下,一个全局代理接收外部请求,将预填充请求转发给 P 节点,将解码请求转发给 D 节点;KV 缓存(键值缓存)通过点对点 (P2P) 通信在 P 节点和 D 节点之间交换。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:33 +msgid "2. Implementation Design" +msgstr "2. 实现设计" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:35 +msgid "" +"Our design diagram is shown below, illustrating the pull and push schemes" +" respectively. ![alt text](../../assets/disaggregated_prefill_pull.png) " +"![alt text](../../assets/disaggregated_prefill_push.png)" +msgstr "" +"我们的设计图如下所示,分别展示了拉取和推送方案。![alt text](../../assets/disaggregated_prefill_pull.png) ![alt text](../../assets/disaggregated_prefill_push.png)" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:35 +msgid "alt text" +msgstr "替代文本" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:39 +msgid "Mooncake Connector" +msgstr "Mooncake 连接器" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:41 +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:49 +msgid "The request is sent to the Proxy’s `_handle_completions` endpoint." +msgstr "请求被发送到代理的 `_handle_completions` 端点。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:42 +msgid "" +"The Proxy calls `select_prefiller` to choose a P node and forwards the " +"request, configuring `kv_transfer_params` with `do_remote_decode=True`, " +"`max_completion_tokens=1`, and `min_tokens=1`." +msgstr "" +"代理调用 `select_prefiller` 选择一个 P 节点并转发请求,配置 `kv_transfer_params` 为 `do_remote_decode=True`、`max_completion_tokens=1` 和 `min_tokens=1`。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:43 +msgid "" +"After the P node’s scheduler finishes prefill, `update_from_output` " +"invokes the schedule connector’s `request_finished` to defer KV cache " +"release, constructs `kv_transfer_params` with `do_remote_prefill=True`, " +"and returns to the Proxy." +msgstr "" +"P 节点的调度器完成预填充后,`update_from_output` 调用调度连接器的 `request_finished` 以延迟释放 KV 缓存,构建 `kv_transfer_params` 为 `do_remote_prefill=True`,并返回给代理。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:44 +msgid "" +"The Proxy calls `select_decoder` to choose a D node and forwards the " +"request." +msgstr "代理调用 `select_decoder` 选择一个 D 节点并转发请求。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:45 +msgid "" +"On the D node, the scheduler marks the request as " +"`RequestStatus.WAITING_FOR_REMOTE_KVS`, pre-allocates KV cache, calls " +"`kv_connector_no_forward` to pull the remote KV cache, then notifies the " +"P node to release KV cache and proceeds with decoding to return the " +"result." +msgstr "" +"在 D 节点上,调度器将请求标记为 `RequestStatus.WAITING_FOR_REMOTE_KVS`,预分配 KV 缓存,调用 `kv_connector_no_forward` 拉取远程 KV 缓存,然后通知 P 节点释放 KV 缓存并继续解码以返回结果。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:47 +msgid "Mooncake Layerwise Connector" +msgstr "Mooncake 分层连接器" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:50 +msgid "" +"The Proxy calls `select_decoder` to choose a D node and forwards the " +"request, configuring `kv_transfer_params` with `do_remote_prefill=True` " +"and setting the `metaserver` endpoint." +msgstr "" +"代理调用 `select_decoder` 选择一个 D 节点并转发请求,配置 `kv_transfer_params` 为 `do_remote_prefill=True` 并设置 `metaserver` 端点。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:51 +msgid "" +"On the D node, the scheduler uses `kv_transfer_params` to mark the " +"request as `RequestStatus.WAITING_FOR_REMOTE_KVS`, pre-allocates KV " +"cache, then calls `kv_connector_no_forward` to send a request to the " +"metaserver and waits for the KV cache transfer to complete." +msgstr "" +"在 D 节点上,调度器使用 `kv_transfer_params` 将请求标记为 `RequestStatus.WAITING_FOR_REMOTE_KVS`,预分配 KV 缓存,然后调用 `kv_connector_no_forward` 向元服务器发送请求并等待 KV 缓存传输完成。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:52 +msgid "" +"The Proxy’s `metaserver` endpoint receives the request, calls " +"`select_prefiller` to choose a P node, and forwards it with " +"`kv_transfer_params` set to `do_remote_decode=True`, " +"`max_completion_tokens=1`, and `min_tokens=1`." +msgstr "" +"代理的 `metaserver` 端点接收请求,调用 `select_prefiller` 选择一个 P 节点,并转发请求,设置 `kv_transfer_params` 为 `do_remote_decode=True`、`max_completion_tokens=1` 和 `min_tokens=1`。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:53 +msgid "" +"During processing, the P node’s scheduler pushes KV cache layer-wise; " +"once all layers pushing is complete, it releases the request and notifies" +" the D node to begin decoding." +msgstr "在处理过程中,P 节点的调度器逐层推送 KV 缓存;所有层推送完成后,它释放请求并通知 D 节点开始解码。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:54 +msgid "The D node performs decoding and returns the result." +msgstr "D 节点执行解码并返回结果。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:56 +msgid "3. Interface Design" +msgstr "3. 接口设计" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:58 +msgid "" +"Taking MooncakeConnector as an example, the system is organized into " +"three primary classes:" +msgstr "以 MooncakeConnector 为例,系统被组织成三个主要类:" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:60 +msgid "**MooncakeConnector**: Base class that provides core interfaces." +msgstr "**MooncakeConnector**:提供核心接口的基类。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:61 +msgid "" +"**MooncakeConnectorScheduler**: Interface for scheduling the connectors " +"within the engine core, responsible for managing KV cache transfer " +"requirements and completion." +msgstr "**MooncakeConnectorScheduler**:用于在引擎核心内调度连接器的接口,负责管理 KV 缓存传输需求和完成情况。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:62 +msgid "" +"**MooncakeConnectorWorker**: Interface for managing KV cache registration" +" and transfer in worker processes." +msgstr "**MooncakeConnectorWorker**:用于在工作进程中管理 KV 缓存注册和传输的接口。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:64 +msgid "4. Specifications Design" +msgstr "4. 规格设计" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:66 +msgid "" +"This feature is flexible and supports various configurations, including " +"setups with MLA and GQA models. It is compatible with A2 and A3 hardware " +"configurations and facilitates scenarios involving both equal and unequal" +" TP setups across multiple P and D nodes." +msgstr "" +"此功能灵活,支持多种配置,包括使用 MLA 和 GQA 模型的设置。它与 A2 和 A3 硬件配置兼容,并支持跨多个 P 节点和 D 节点的相等和不相等 TP 设置场景。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md +msgid "Feature" +msgstr "功能" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md +msgid "Status" +msgstr "状态" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md +msgid "A2" +msgstr "A2" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md +msgid "🟢 Functional" +msgstr "🟢 功能正常" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md +msgid "A3" +msgstr "A3" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md +msgid "equal TP configuration" +msgstr "相等 TP 配置" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md +msgid "unequal TP configuration" +msgstr "不相等 TP 配置" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md +msgid "MLA" +msgstr "MLA" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md +msgid "GQA" +msgstr "GQA" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:77 +msgid "🟢 Functional: Fully operational, with ongoing optimizations." +msgstr "🟢 功能正常:完全可运行,正在进行优化。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:78 +msgid "🔵 Experimental: Experimental support, interfaces and functions may change." +msgstr "🔵 实验性:实验性支持,接口和功能可能发生变化。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:79 +msgid "🚧 WIP: Under active development, will be supported soon." +msgstr "🚧 开发中:正在积极开发,即将支持。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:80 +msgid "" +"🟡 Planned: Scheduled for future implementation (some may have open " +"PRs/RFCs)." +msgstr "🟡 计划中:计划在未来实现(部分可能已有开放的 PR/RFC)。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:81 +msgid "🔴 NO plan/Deprecated: No plan or deprecated by vLLM." +msgstr "🔴 无计划/已弃用:无计划或已被 vLLM 弃用。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:85 +msgid "DFX Analysis" +msgstr "DFX 分析" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:87 +msgid "1. Config Parameter Validation" +msgstr "1. 配置参数验证" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:89 +msgid "" +"Validate KV transfer config by checking whether the kv_connector type is " +"supported and whether kv_connector_module_path exists and is loadable. On" +" transfer failures, emit clear error logs for diagnostics." +msgstr "" +"通过检查 kv_connector 类型是否受支持以及 kv_connector_module_path 是否存在且可加载来验证 KV 传输配置。传输失败时,发出清晰的错误日志以供诊断。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:91 +msgid "2. Port Conflict Detection" +msgstr "2. 端口冲突检测" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:93 +msgid "" +"Before startup, perform a port-usage check on configured ports (e.g., " +"rpc_port, metrics_port, http_port/metaserver) by attempting to bind. If a" +" port is already in use, fail fast and log an error." +msgstr "启动前,通过尝试绑定来对配置的端口(例如 rpc_port、metrics_port、http_port/metaserver)进行端口使用情况检查。如果端口已被占用,快速失败并记录错误。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:95 +msgid "3. PD Ratio Validation" +msgstr "3. PD 比例验证" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:97 +msgid "" +"Under non-symmetric PD scenarios, validate the P-to-D tp ratio against " +"expected and scheduling constraints to ensure correct and reliable " +"operation." +msgstr "在非对称 PD 场景下,根据预期和调度约束验证 P 到 D 的 tp 比例,以确保正确可靠的操作。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:101 +msgid "Limitations" +msgstr "限制" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:103 +msgid "" +"Heterogeneous P and D nodes are not supported—for example, running P " +"nodes on A2 and D nodes on A3." +msgstr "不支持异构的 P 节点和 D 节点——例如,在 A2 上运行 P 节点,在 A3 上运行 D 节点。" + +#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:105 +msgid "" +"In non-symmetric TP configurations, only cases where the P nodes have a " +"higher TP degree than the D nodes and the P TP count is an integer " +"multiple of the D TP count are supported (i.e., P_tp > D_tp and P_tp % " +"D_tp = 0)." +msgstr "在非对称 TP 配置中,仅支持 P 节点的 TP 度数高于 D 节点且 P 节点的 TP 数量是 D 节点 TP 数量的整数倍的情况(即 P_tp > D_tp 且 P_tp % D_tp = 0)。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/eplb_swift_balancer.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/eplb_swift_balancer.po new file mode 100644 index 00000000..8ea543fd --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/eplb_swift_balancer.po @@ -0,0 +1,467 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:1 +msgid "Expert Parallelism Load Balancer (EPLB)" +msgstr "专家并行负载均衡器 (EPLB)" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:3 +msgid "Why We Need EPLB?" +msgstr "为什么需要 EPLB?" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:5 +msgid "" +"When using Expert Parallelism (EP), different experts are assigned to " +"different NPUs. Given that the load of various experts may vary depending" +" on the current workload, it is crucial to maintain balanced loads across" +" different NPUs. We adopt a redundant experts strategy by duplicating " +"heavily-loaded experts. Then, we heuristically pack these duplicated " +"experts onto NPUs to ensure load balancing across them. Moreover, thanks " +"to the group-limited expert routing used in MoE models, we also attempt " +"to place experts of the same group on the same node to reduce inter-node " +"data traffic, whenever possible." +msgstr "" +"在使用专家并行 (EP) 时,不同的专家被分配到不同的 NPU 上。鉴于不同专家的负载可能因当前工作负载而异,保持不同 NPU 之间的负载均衡至关重要。我们采用冗余专家策略,通过复制高负载的专家来实现。然后,我们启发式地将这些复制的专家打包到 NPU 上,以确保它们之间的负载均衡。此外,得益于 MoE 模型中使用的组限制专家路由,我们也尽可能将同一组的专家放置在同一节点上,以减少节点间的数据流量。" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:7 +msgid "" +"To facilitate reproduction and deployment, vLLM Ascend supports the " +"deployed EP load balancing algorithm in `vllm_ascend/eplb/core/policy`. " +"The algorithm computes a balanced expert replication and placement plan " +"based on the estimated expert loads. Note that the exact method for " +"predicting expert loads is outside the scope of this repository. A common" +" method is to use a moving average of historical statistics." +msgstr "" +"为了方便复现和部署,vLLM Ascend 在 `vllm_ascend/eplb/core/policy` 中支持已部署的 EP 负载均衡算法。该算法根据估计的专家负载计算一个均衡的专家复制和放置计划。请注意,预测专家负载的具体方法不在本仓库的讨论范围内。一种常见的方法是使用历史统计数据的移动平均值。" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:9 +msgid "![eplb](../../assets/eplb.png)" +msgstr "![eplb](../../assets/eplb.png)" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:9 +msgid "eplb" +msgstr "eplb" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:11 +msgid "How to Use EPLB?" +msgstr "如何使用 EPLB?" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:13 +msgid "" +"Please refer to the EPLB section of the user guide for detailed " +"information: [How to Use " +"EPLB](../../user_guide/feature_guide/eplb_swift_balancer.md)" +msgstr "" +"请参阅用户指南中的 EPLB 部分以获取详细信息:[如何使用 " +"EPLB](../../user_guide/feature_guide/eplb_swift_balancer.md)" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:15 +msgid "How It Works?" +msgstr "工作原理" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:17 +msgid "**EPLB Module Architecture**" +msgstr "**EPLB 模块架构**" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:40 +msgid "" +"**1. Adaptor Module** *Handles registration and adaptation for " +"different MoE model types*" +msgstr "**1. 适配器模块** *处理不同 MoE 模型类型的注册和适配*" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:43 +msgid "" +"`abstract_adaptor.py` Abstract base class defining unified registration" +" interfaces for EPLB adapters" +msgstr "`abstract_adaptor.py` 定义 EPLB 适配器统一注册接口的抽象基类" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:45 +msgid "" +"`vllm_adaptor.py` Implementation supporting Qwen3-MoE and DeepSeek " +"models, standardizing parameter handling for policy algorithms" +msgstr "`vllm_adaptor.py` 支持 Qwen3-MoE 和 DeepSeek 模型的实现,标准化策略算法的参数处理" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:48 +msgid "" +"**2. Core Module** *Implements core algorithms, updates, and " +"asynchronous processing*" +msgstr "**2. 核心模块** *实现核心算法、更新和异步处理*" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:51 +msgid "" +"**Policy Submodule** *Load balancing algorithms with factory pattern " +"instantiation*" +msgstr "**策略子模块** *采用工厂模式实例化的负载均衡算法*" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:53 +msgid "" +"`policy_abstract.py` Abstract class for load balancing strategy " +"interfaces" +msgstr "`policy_abstract.py` 负载均衡策略接口的抽象类" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:55 +msgid "" +"`policy_default_eplb.py` Default implementation of open-source EPLB " +"paper algorithm" +msgstr "`policy_default_eplb.py` 开源 EPLB 论文算法的默认实现" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:57 +msgid "" +"`policy_swift_balancer.py` Enhanced version optimizing expert swaps for" +" low-bandwidth devices (e.g., A2)" +msgstr "`policy_swift_balancer.py` 针对低带宽设备(例如 A2)优化专家交换的增强版本" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:59 +msgid "" +"`policy_flashlb.py` Threshold-based adjustment reducing operational " +"costs through layer-wise fluctuation detection" +msgstr "`policy_flashlb.py` 基于阈值的调整,通过逐层波动检测降低操作成本" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:61 +msgid "" +"`policy_factory.py` Strategy factory for automatic algorithm " +"instantiation" +msgstr "`policy_factory.py` 用于自动算法实例化的策略工厂" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:64 +msgid "" +"`eplb_device_transfer_loader.py` Manages expert table/weight " +"transmission and updates" +msgstr "`eplb_device_transfer_loader.py` 管理专家表/权重的传输和更新" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:66 +msgid "`eplb_utils.py` Utilities for expert table initialization and mapping" +msgstr "`eplb_utils.py` 用于专家表初始化和映射的实用工具" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:68 +msgid "" +"`eplb_worker.py` Asynchronous algorithm orchestration and result " +"processing" +msgstr "`eplb_worker.py` 异步算法编排和结果处理" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:71 +msgid "**3. System Components**" +msgstr "**3. 系统组件**" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:73 +msgid "" +"`eplb_updator.py` Central coordinator for load balancing during " +"inference workflows" +msgstr "`eplb_updator.py` 推理工作流中负载均衡的中心协调器" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:75 +msgid "`utils.py` General utilities for EPLB interface registration" +msgstr "`utils.py` EPLB 接口注册的通用实用工具" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:78 +msgid "*Key Optimizations:*" +msgstr "*关键优化点:*" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:80 +msgid "Maintained original structure while improving technical clarity" +msgstr "保持原始结构的同时提高了技术清晰度" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:81 +msgid "Standardized terminology" +msgstr "标准化术语" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:82 +msgid "Enhanced algorithm differentiation through concise descriptors" +msgstr "通过简洁的描述符增强了算法区分度" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:83 +msgid "Improved scoping through hierarchical presentation" +msgstr "通过分层展示改进了范围界定" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:84 +msgid "Preserved file/class relationships while optimizing readability" +msgstr "在优化可读性的同时保留了文件/类关系" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:86 +msgid "Default Algorithm" +msgstr "默认算法" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:88 +msgid "Hierarchical Load Balancing" +msgstr "分层负载均衡" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:90 +msgid "" +"When the number of server nodes evenly divides the number of expert " +"groups, we use the hierarchical load balancing policy to leverage group-" +"limited expert routing. We first pack the expert groups onto nodes " +"evenly, ensuring balanced loads across different nodes. Then, we " +"replicate the experts within each node. Finally, we pack the replicated " +"experts onto individual NPUs to ensure load balancing across them. The " +"hierarchical load balancing policy can be used in the prefilling stage " +"with a smaller expert-parallel size." +msgstr "" +"当服务器节点数量能整除专家组数量时,我们使用分层负载均衡策略来利用组限制专家路由。我们首先将专家组均匀地打包到节点上,确保不同节点间的负载均衡。然后,我们在每个节点内复制专家。最后,我们将复制的专家打包到各个 NPU 上,以确保它们之间的负载均衡。分层负载均衡策略可以在预填充阶段使用,此时专家并行规模较小。" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:92 +msgid "Global Load Balancing" +msgstr "全局负载均衡" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:94 +msgid "" +"In other cases, we use the global load balancing policy, which replicates" +" experts globally regardless of expert groups, and packs the replicated " +"experts onto individual NPUs. This policy can be adopted in the decoding " +"stage with a larger expert-parallel size." +msgstr "" +"在其他情况下,我们使用全局负载均衡策略,该策略不考虑专家组,而是在全局范围内复制专家,并将复制的专家打包到各个 NPU 上。此策略可以在解码阶段采用,此时专家并行规模较大。" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:96 +msgid "Add a New EPLB Policy" +msgstr "添加新的 EPLB 策略" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:98 +msgid "" +"If you want to add a new eplb policy to vllm_ascend, you must follow " +"these steps:" +msgstr "如果你想向 vllm_ascend 添加一个新的 eplb 策略,必须遵循以下步骤:" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:100 +msgid "" +"Inherit the `EplbPolicy` abstract class of `policy_abstract.py` and " +"override the `rebalance_experts` interface, ensuring consistent input " +"parameters `current_expert_table`, `expert_workload` and return types " +"`newplacement`. For example:" +msgstr "" +"继承 `policy_abstract.py` 中的 `EplbPolicy` 抽象类,并重写 `rebalance_experts` 接口,确保输入参数 " +"`current_expert_table`、`expert_workload` 和返回类型 `newplacement` 保持一致。例如:" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:126 +msgid "" +"To add a new EPLB algorithm, include the policy type and its " +"corresponding implementation class in the `PolicyFactory` of " +"`policy_factory.py`." +msgstr "要添加新的 EPLB 算法,请在 `policy_factory.py` 的 `PolicyFactory` 中包含策略类型及其对应的实现类。" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:128 +msgid "Add a New MoE Model" +msgstr "添加新的 MoE 模型" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:130 +msgid "**Implementation Guide for Model Integration**" +msgstr "**模型集成实施指南**" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:132 +msgid "**Adapter File Modification**" +msgstr "**适配器文件修改**" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:133 +msgid "Inherit or modify `vllm_ascend/eplb/adaptor/vllm_adaptor.py`" +msgstr "继承或修改 `vllm_ascend/eplb/adaptor/vllm_adaptor.py`" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:134 +msgid "Add processing logic for key parameters:" +msgstr "为关键参数添加处理逻辑:" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:135 +msgid "`num_dense_layers`" +msgstr "`num_dense_layers`" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:136 +msgid "`global_expert_num`" +msgstr "`global_expert_num`" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:137 +msgid "`num_roe_layers`" +msgstr "`num_roe_layers`" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:138 +msgid "Ensure parameter synchronization in the `model_register` function." +msgstr "确保在 `model_register` 函数中进行参数同步。" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:140 +msgid "For example:" +msgstr "例如:" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:142 +msgid "Modify `__init__` of `vllm_adaptor.py` to add a new moe model eplb params:" +msgstr "修改 `vllm_adaptor.py` 的 `__init__` 以添加新 MoE 模型的 eplb 参数:" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:150 +msgid "" +"Modify `model_register` of `vllm_adaptor.py` to register eplb params for " +"new moe model:" +msgstr "修改 `vllm_adaptor.py` 的 `model_register` 以注册新 MoE 模型的 eplb 参数:" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:157 +msgid "**MoE Feature Integration**" +msgstr "**MoE 功能集成**" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:158 +msgid "Extend `vllm_ascend/eplb/utils.py` with MoE-specific methods" +msgstr "使用 MoE 特定方法扩展 `vllm_ascend/eplb/utils.py`" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:159 +msgid "Implement required functionality for expert routing or weight management" +msgstr "实现专家路由或权重管理所需的功能" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:161 +msgid "**Registration Logic Update**" +msgstr "**注册逻辑更新**" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:162 +msgid "Add patch logic within the `model_register` function" +msgstr "在 `model_register` 函数内添加补丁逻辑" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:163 +msgid "Maintain backward compatibility with existing model types" +msgstr "保持与现有模型类型的向后兼容性" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:165 +msgid "**Validation & Testing**" +msgstr "**验证与测试**" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:166 +msgid "Verify parameter consistency across layers" +msgstr "验证跨层的参数一致性" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:167 +msgid "Test cross-device communication for expert tables" +msgstr "测试专家表的跨设备通信" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:168 +msgid "Benchmark against baseline implementations (e.g., Qwen3-MoE)" +msgstr "与基线实现(例如 Qwen3-MoE)进行基准测试" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:170 +msgid "*Key Implementation Notes:*" +msgstr "*关键实施说明:*" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:172 +msgid "Preserve existing interface contracts in abstract classes" +msgstr "在抽象类中保留现有的接口契约" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:173 +msgid "Use decorators for non-intrusive patch integration" +msgstr "使用装饰器进行非侵入式补丁集成" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:174 +msgid "Leverage `eplb_utils.py` for shared expert mapping operations" +msgstr "利用 `eplb_utils.py` 进行共享的专家映射操作" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:176 +msgid "DFX" +msgstr "DFX" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:178 +msgid "Parameter Validation" +msgstr "参数验证" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:180 +msgid "Integer Parameters" +msgstr "整数参数" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:182 +msgid "" +"All integer input parameters must explicitly specify their maximum and " +"minimum values and be subject to valid value validation. For example, " +"`expert_heat_collection_interval` must be greater than 0:" +msgstr "" +"所有整型输入参数必须明确指定其最大值和最小值,并接受有效值验证。例如,`expert_heat_collection_interval` 必须大于0:" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:197 +msgid "File Path" +msgstr "文件路径" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:199 +msgid "" +"The file path for EPLB must be checked for legality, such as whether the " +"file path is valid and whether it has appropriate read and write " +"permissions. For example:" +msgstr "必须检查 EPLB 文件路径的合法性,例如文件路径是否有效以及是否具有适当的读写权限。例如:" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:225 +msgid "Function Specifications" +msgstr "功能规范" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:227 +msgid "Initialization Function" +msgstr "初始化函数" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:229 +msgid "" +"All EPLB parameters must be initialized by default during initialization," +" with specified parameter types and default values for proper handling." +msgstr "所有 EPLB 参数在初始化期间必须默认初始化,并指定参数类型和默认值以便正确处理。" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:231 +msgid "General Functions" +msgstr "通用函数" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:233 +msgid "" +"All method arguments must specify parameter types and default values, and" +" functions must include default return value handling for default " +"arguments. It is recommended to use `try-except` blocks to handle the " +"function body, specifying the type of exception captured and the failure " +"handling (e.g., logging exceptions or returning a failure status)." +msgstr "" +"所有方法参数必须指定参数类型和默认值,并且函数必须包含针对默认参数的默认返回值处理。建议使用 `try-except` 块来处理函数体,指定捕获的异常类型和失败处理(例如,记录异常或返回失败状态)。" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:235 +msgid "Consistency" +msgstr "一致性" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:237 +msgid "Expert Map" +msgstr "专家映射" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:239 +msgid "" +"The expert map must be globally unique during initialization and update. " +"In a multi-node scenario during initialization, distributed communication" +" should be used to verify the consistency of expert maps across each " +"rank. If they are inconsistent, the user should be notified which ranks " +"have inconsistent maps. During the update process, if only a few layers " +"or the expert table of a certain rank has been changed, the updated " +"expert table must be synchronized with the EPLB's context to ensure " +"global consistency." +msgstr "" +"专家映射在初始化和更新期间必须是全局唯一的。在初始化期间的多节点场景中,应使用分布式通信来验证每个 rank 上专家映射的一致性。如果不一致,应通知用户哪些 rank 的映射不一致。在更新过程中,如果只有少数层或某个 rank 的专家表被更改,则必须将更新后的专家表与 EPLB 的上下文同步,以确保全局一致性。" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:242 +msgid "Expert Weight" +msgstr "专家权重" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:244 +msgid "" +"When updating expert weights, ensure that the memory allocated for the " +"expert weights has been released, or that the expert (referring to the " +"old version) is no longer in use." +msgstr "更新专家权重时,确保为专家权重分配的内存已被释放,或者专家(指旧版本)不再被使用。" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:246 +msgid "Limitations" +msgstr "限制" + +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:248 +msgid "" +"Before using EPLB, start the script and add `export " +"DYNAMIC_EPLB=\"true\"`. Before performing load data collection (or " +"performance data collection), start the script and add `export " +"EXPERT_MAP_RECORD=\"true\"`." +msgstr "" +"在使用 EPLB 之前,启动脚本并添加 `export DYNAMIC_EPLB=\"true\"`。在执行负载数据收集(或性能数据收集)之前,启动脚本并添加 `export EXPERT_MAP_RECORD=\"true\"`。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/npugraph_ex.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/npugraph_ex.po new file mode 100644 index 00000000..a2268efa --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/npugraph_ex.po @@ -0,0 +1,220 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md:1 +msgid "Npugraph_ex" +msgstr "Npugraph_ex" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md:3 +msgid "How Does It Work?" +msgstr "工作原理" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md:5 +msgid "" +"This is an optimization based on Fx graphs, which can be considered an " +"acceleration solution for the aclgraph mode." +msgstr "这是一种基于 Fx 图的优化,可视为 aclgraph 模式的一种加速方案。" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md:7 +msgid "You can get its code [code](https://gitcode.com/Ascend/torchair)" +msgstr "您可以在 [code](https://gitcode.com/Ascend/torchair) 获取其代码" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md:9 +msgid "Default Fx Graph Optimization" +msgstr "默认 Fx 图优化" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md:11 +msgid "Fx Graph pass" +msgstr "Fx 图处理过程" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md:13 +msgid "" +"For the intermediate nodes of the model, replace the non-in-place " +"operators contained in the nodes with in-place operators to reduce memory" +" movement during computation and improve performance." +msgstr "对于模型的中间节点,将其包含的非原位运算符替换为原位运算符,以减少计算过程中的内存移动,提升性能。" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md:14 +msgid "" +"For the original input parameters of the model, if they include in-place " +"operators, Dynamo's Functionalize process will replace the in-place " +"operators with a form of non-in-place operators + copy operators. " +"npugraph_ex will reverse this process, restoring the in-place operators " +"and reducing memory movement." +msgstr "对于模型的原始输入参数,如果包含原位运算符,Dynamo 的 Functionalize 过程会将其替换为非原位运算符 + 复制运算符的形式。npugraph_ex 将逆转此过程,恢复原位运算符,减少内存移动。" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md:16 +msgid "Fx fusion pass" +msgstr "Fx 融合处理过程" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md:18 +msgid "" +"npugraph_ex now provides three default operator fusion passes, and more " +"will be added in the future." +msgstr "npugraph_ex 目前提供三种默认的算子融合处理过程,未来将添加更多。" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md:20 +msgid "" +"Operator combinations that meet the replacement rules can be replaced " +"with the corresponding fused operators." +msgstr "符合替换规则的算子组合可以被替换为相应的融合算子。" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md:22 +msgid "" +"You can get the default [fusion pass " +"list](https://www.hiascend.com/document/detail/zh/Pytorch/730/modthirdparty/torchairuseguide/torchair_00017.html)" +msgstr "您可以查看默认的[融合处理过程列表](https://www.hiascend.com/document/detail/zh/Pytorch/730/modthirdparty/torchairuseguide/torchair_00017.html)" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md:24 +msgid "Custom fusion pass" +msgstr "自定义融合处理过程" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md:26 +msgid "" +"Users can register a custom graph fusion pass in TorchAir to modify " +"PyTorch FX graphs. The registration relies on the register_replacement " +"API." +msgstr "用户可以在 TorchAir 中注册自定义的图融合处理过程,以修改 PyTorch FX 图。注册依赖于 register_replacement API。" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md:28 +msgid "Below is the declaration of this API and a demo of its usage." +msgstr "以下是该 API 的声明及其使用示例。" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md +msgid "Parameter Name" +msgstr "参数名称" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md +msgid "Input/Output" +msgstr "输入/输出" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md +msgid "Explanation" +msgstr "说明" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md +msgid "Is necessary" +msgstr "是否必需" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md +msgid "search_fn" +msgstr "search_fn" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md +msgid "Input" +msgstr "输入" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md +msgid "" +"This function is the operator combination or calculation logic that you " +"want to recognize in the FX graph, such as the operator combination that " +"needs to be fused" +msgstr "此函数是您希望在 FX 图中识别的算子组合或计算逻辑,例如需要融合的算子组合" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md +msgid "Yes" +msgstr "是" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md +msgid "replace_fn" +msgstr "replace_fn" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md +msgid "" +"When the combination corresponding to search_fn is found in the target " +"graph, this function's computation logic will replace the original " +"subgraph to achieve operator fusion or optimization." +msgstr "当在目标图中找到与 search_fn 对应的组合时,此函数的计算逻辑将替换原子图,以实现算子融合或优化。" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md +msgid "example_inputs" +msgstr "example_inputs" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md +msgid "" +"Example input tensors used to track search_fn and replace_fn. The shape " +"and dtype of the input should match the actual scenario." +msgstr "用于追踪 search_fn 和 replace_fn 的示例输入张量。输入的形状和数据类型应与实际场景匹配。" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md +msgid "trace_fn" +msgstr "trace_fn" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md +msgid "" +"By default, only the forward computation graph is tracked, which is " +"suitable for optimization during the inference phase; if training " +"scenarios need to be supported, a function that supports backward " +"tracking can be provided." +msgstr "默认情况下,仅追踪前向计算图,这适用于推理阶段的优化;如果需要支持训练场景,可以提供支持反向追踪的函数。" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md +msgid "No" +msgstr "否" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md +msgid "extra_check" +msgstr "extra_check" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md +msgid "" +"Find the extra verification function after operator fusion. The " +"function's input parameter must be a Match object from " +"torch._inductor.pattern_matcher, and it is used for further custom checks" +" on the matching result, such as checking whether the fused operators are" +" on the same stream, checking the device type, checking the input shapes," +" and so on." +msgstr "算子融合后的额外验证函数。该函数的输入参数必须是来自 torch._inductor.pattern_matcher 的 Match 对象,用于对匹配结果进行进一步的自定义检查,例如检查融合后的算子是否在同一流上、检查设备类型、检查输入形状等。" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md +msgid "search_fn_pattern" +msgstr "search_fn_pattern" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md +msgid "" +"A custom pattern object is generally unnecessary to provide. Its " +"definition follows the rules of the native PyTorch MultiOutputPattern " +"object. After passing this parameter, search_fn will no longer be used to" +" match operator combinations; instead, this parameter will be used " +"directly as the matching rule." +msgstr "通常无需提供自定义模式对象。其定义遵循原生 PyTorch MultiOutputPattern 对象的规则。传入此参数后,将不再使用 search_fn 来匹配算子组合,而是直接使用此参数作为匹配规则。" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md:43 +msgid "Usage Example" +msgstr "使用示例" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md:97 +msgid "" +"The default fusion pass in npugraph_ex is also implemented based on this " +"API. You can see more examples of using this API in the vllm-ascend and " +"npugraph_ex code repositories." +msgstr "npugraph_ex 中的默认融合处理过程也是基于此 API 实现的。您可以在 vllm-ascend 和 npugraph_ex 代码仓库中查看更多使用此 API 的示例。" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md:99 +msgid "DFX" +msgstr "DFX" + +#: ../../source/developer_guide/Design_Documents/npugraph_ex.md:101 +msgid "" +"By reusing the TORCH_COMPILE_DEBUG environment variable from the PyTorch " +"community, when TORCH_COMPILE_DEBUG=1 is set, it will output the FX " +"graphs throughout the entire process." +msgstr "通过复用 PyTorch 社区的 TORCH_COMPILE_DEBUG 环境变量,当设置 TORCH_COMPILE_DEBUG=1 时,将输出整个过程中的 FX 图。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/patch.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/patch.po index a3afc101..8b66e104 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/patch.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/patch.po @@ -4,245 +4,218 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../developer_guide/Design_Documents/patch.md:1 +#: ../../source/developer_guide/Design_Documents/patch.md:1 msgid "Patch in vLLM Ascend" -msgstr "在 vLLM Ascend 中的补丁" +msgstr "vLLM Ascend 中的补丁" -#: ../../developer_guide/Design_Documents/patch.md:3 +#: ../../source/developer_guide/Design_Documents/patch.md:3 msgid "" -"vLLM Ascend is a platform plugin for vLLM. Due to the release cycle of vLLM " -"and vLLM Ascend is different, and the hardware limitation in some case, we " -"need to patch some code in vLLM to make it compatible with vLLM Ascend." +"vLLM Ascend is a platform plugin for vLLM. Due to the different release " +"cycle of vLLM and vLLM Ascend and their hardware limitations, we need to " +"patch some code in vLLM to make it compatible with vLLM Ascend." msgstr "" "vLLM Ascend 是 vLLM 的一个平台插件。由于 vLLM 和 vLLM Ascend " -"的发布周期不同,并且在某些情况下存在硬件限制,我们需要对 vLLM 进行一些代码补丁,以使其能够兼容 vLLM Ascend。" +"的发布周期不同且存在硬件限制,我们需要对 vLLM 中的部分代码打补丁,以使其兼容 vLLM Ascend。" -#: ../../developer_guide/Design_Documents/patch.md:5 +#: ../../source/developer_guide/Design_Documents/patch.md:5 msgid "" "In vLLM Ascend code, we provide a patch module `vllm_ascend/patch` to " -"address the change for vLLM." -msgstr "在 vLLM Ascend 代码中,我们提供了一个补丁模块 `vllm_ascend/patch` 用于应对 vLLM 的变更。" +"adapt to changes in vLLM." +msgstr "在 vLLM Ascend 代码中,我们提供了一个补丁模块 `vllm_ascend/patch` 来适配 vLLM 的变更。" -#: ../../developer_guide/Design_Documents/patch.md:7 +#: ../../source/developer_guide/Design_Documents/patch.md:7 msgid "Principle" -msgstr "原理" +msgstr "原则" -#: ../../developer_guide/Design_Documents/patch.md:9 +#: ../../source/developer_guide/Design_Documents/patch.md:9 msgid "" -"We should keep in mind that Patch is not the best way to make vLLM Ascend " -"compatible. It's just a temporary solution. The best way is to contribute " -"the change to vLLM to make it compatible with vLLM Ascend originally. In " -"vLLM Ascend, we have the basic principle for Patch strategy:" +"We should keep in mind that Patch is not the best way to make vLLM Ascend" +" compatible. It's just a temporary solution. The best way is to " +"contribute the change to vLLM to make it compatible with vLLM Ascend " +"initially. In vLLM Ascend, we have the basic principle for Patch " +"strategy:" msgstr "" -"我们需要记住,Patch 不是让 vLLM 兼容 Ascend 的最佳方式,这只是一个临时的解决方案。最好的方法是将修改贡献到 vLLM 项目中,从而让" -" vLLM 原生支持 Ascend。对于 vLLM Ascend,我们对 Patch 策略有一个基本原则:" +"我们需要牢记,补丁并非实现 vLLM Ascend 兼容性的最佳方式,它只是一个临时解决方案。最佳方式是将修改贡献给 vLLM,使其原生兼容 " +"vLLM Ascend。在 vLLM Ascend 中,我们遵循以下补丁策略基本原则:" -#: ../../developer_guide/Design_Documents/patch.md:11 +#: ../../source/developer_guide/Design_Documents/patch.md:11 msgid "Less is more. Please do not patch unless it's the only way currently." -msgstr "少即是多。请不要打补丁,除非这是目前唯一的方法。" +msgstr "少即是多。除非是当前唯一的方法,否则请不要打补丁。" -#: ../../developer_guide/Design_Documents/patch.md:12 +#: ../../source/developer_guide/Design_Documents/patch.md:12 msgid "" "Once a patch is added, it's required to describe the future plan for " "removing the patch." -msgstr "一旦补丁被添加,必须说明将来移除该补丁的计划。" +msgstr "一旦添加补丁,必须描述未来移除该补丁的计划。" -#: ../../developer_guide/Design_Documents/patch.md:13 -msgid "Anytime, clean the patch code is welcome." -msgstr "任何时候,欢迎清理补丁代码。" +#: ../../source/developer_guide/Design_Documents/patch.md:13 +msgid "Anytime, cleaning the patch code is welcome." +msgstr "随时欢迎清理补丁代码。" -#: ../../developer_guide/Design_Documents/patch.md:15 +#: ../../source/developer_guide/Design_Documents/patch.md:15 msgid "How it works" msgstr "工作原理" -#: ../../developer_guide/Design_Documents/patch.md:17 +#: ../../source/developer_guide/Design_Documents/patch.md:17 msgid "In `vllm_ascend/patch`, you can see the code structure as follows:" -msgstr "在 `vllm_ascend/patch` 目录中,你可以看到如下代码结构:" +msgstr "在 `vllm_ascend/patch` 中,你可以看到如下代码结构:" -#: ../../developer_guide/Design_Documents/patch.md:33 +#: ../../source/developer_guide/Design_Documents/patch.md:29 msgid "" -"**platform**: The patch code in this directory is for patching the code in " -"vLLM main process. It's called by " -"`vllm_ascend/platform::NPUPlatform::pre_register_and_update` very early when" -" vLLM is initialized." +"**platform**: The patch code in this directory is for patching the code " +"in vLLM main process. It's called by " +"`vllm_ascend/platform::NPUPlatform::pre_register_and_update` very early " +"when vLLM is initialized." msgstr "" -"**platform**:此目录下的补丁代码用于修补 vLLM 主进程中的代码。当 vLLM 初始化时,会在很早的阶段由 " +"**platform**:此目录中的补丁代码用于修补 vLLM 主进程中的代码。它在 vLLM 初始化早期由 " "`vllm_ascend/platform::NPUPlatform::pre_register_and_update` 调用。" -#: ../../developer_guide/Design_Documents/patch.md:34 +#: ../../source/developer_guide/Design_Documents/patch.md:30 msgid "" -"For online mode, vLLM process calls the platform patch here " -"`vllm/vllm/engine/arg_utils.py::AsyncEngineArgs.add_cli_args` when parsing " -"the cli args." +"For online mode, vLLM process calls the platform patch in " +"`vllm/vllm/engine/arg_utils.py::AsyncEngineArgs.add_cli_args` when " +"parsing the cli args." msgstr "" "对于在线模式,vLLM 进程在解析命令行参数时,会在 " -"`vllm/vllm/engine/arg_utils.py::AsyncEngineArgs.add_cli_args` 这里调用平台补丁。" +"`vllm/vllm/engine/arg_utils.py::AsyncEngineArgs.add_cli_args` 处调用平台补丁。" -#: ../../developer_guide/Design_Documents/patch.md:35 +#: ../../source/developer_guide/Design_Documents/patch.md:31 msgid "" -"For offline mode, vLLM process calls the platform patch here " +"For offline mode, vLLM process calls the platform patch in " "`vllm/vllm/engine/arg_utils.py::EngineArgs.create_engine_config` when " "parsing the input parameters." msgstr "" -"对于离线模式,vLLM 进程在解析输入参数时,会在此处调用平台补丁 " -"`vllm/vllm/engine/arg_utils.py::EngineArgs.create_engine_config`。" +"对于离线模式,vLLM 进程在解析输入参数时,会在 " +"`vllm/vllm/engine/arg_utils.py::EngineArgs.create_engine_config` 处调用平台补丁。" -#: ../../developer_guide/Design_Documents/patch.md:36 +#: ../../source/developer_guide/Design_Documents/patch.md:32 msgid "" "**worker**: The patch code in this directory is for patching the code in " "vLLM worker process. It's called by " "`vllm_ascend/worker/worker::NPUWorker::__init__` when the vLLM worker " "process is initialized." msgstr "" -"**worker**:此目录中的补丁代码用于修补 vLLM worker 进程中的代码。在初始化 vLLM worker 进程时,会被 " +"**worker**:此目录中的补丁代码用于修补 vLLM worker 进程中的代码。它在 vLLM worker 进程初始化时由 " "`vllm_ascend/worker/worker::NPUWorker::__init__` 调用。" -#: ../../developer_guide/Design_Documents/patch.md:37 +#: ../../source/developer_guide/Design_Documents/patch.md:33 msgid "" -"For both online and offline mode, vLLM engine core process calls the worker " -"patch here `vllm/vllm/worker/worker_base.py::WorkerWrapperBase.init_worker` " -"when initializing the worker process." +"For both online and offline mode, vLLM engine core process calls the " +"worker patch in " +"`vllm/vllm/worker/worker_base.py::WorkerWrapperBase.init_worker` when " +"initializing the worker process." msgstr "" -"无论是在线还是离线模式,vLLM 引擎核心进程在初始化 worker 进程时,都会在这里调用 worker " -"补丁:`vllm/vllm/worker/worker_base.py::WorkerWrapperBase.init_worker`。" +"对于在线和离线模式,vLLM 引擎核心进程在初始化 worker 进程时,会在 " +"`vllm/vllm/worker/worker_base.py::WorkerWrapperBase.init_worker` 处调用 worker 补丁。" -#: ../../developer_guide/Design_Documents/patch.md:39 -msgid "" -"In both **platform** and **worker** folder, there are several patch modules." -" They are used for patching different version of vLLM." -msgstr "在 **platform** 和 **worker** 文件夹中都有一些补丁模块。它们用于修补不同版本的 vLLM。" - -#: ../../developer_guide/Design_Documents/patch.md:41 -msgid "" -"`patch_0_9_2`: This module is used for patching vLLM 0.9.2. The version is " -"always the nearest version of vLLM. Once vLLM is released, we will drop this" -" patch module and bump to a new version. For example, `patch_0_9_2` is used " -"for patching vLLM 0.9.2." -msgstr "" -"`patch_0_9_2`:此模块用于修补 vLLM 0.9.2。该版本始终对应于 vLLM 的最近版本。一旦 vLLM " -"发布新版本,我们将移除此补丁模块并升级到新版本。例如,`patch_0_9_2` 就是用于修补 vLLM 0.9.2 的。" - -#: ../../developer_guide/Design_Documents/patch.md:42 -msgid "" -"`patch_main`: This module is used for patching the code in vLLM main branch." -msgstr "`patch_main`:该模块用于修补 vLLM 主分支代码。" - -#: ../../developer_guide/Design_Documents/patch.md:43 -msgid "" -"`patch_common`: This module is used for patching both vLLM 0.9.2 and vLLM " -"main branch." -msgstr "`patch_common`:此模块用于同时修补 vLLM 0.9.2 版本和 vLLM 主分支。" - -#: ../../developer_guide/Design_Documents/patch.md:45 +#: ../../source/developer_guide/Design_Documents/patch.md:35 msgid "How to write a patch" -msgstr "如何撰写补丁" +msgstr "如何编写补丁" -#: ../../developer_guide/Design_Documents/patch.md:47 +#: ../../source/developer_guide/Design_Documents/patch.md:37 msgid "" -"Before writing a patch, following the principle above, we should patch the " -"least code. If it's necessary, we can patch the code in either **platform** " -"and **worker** folder. Here is an example to patch `distributed` module in " -"vLLM." +"Before writing a patch, following the principle above, we should patch " +"the least code. If it's necessary, we can patch the code in either " +"**platform** or **worker** folder. Here is an example to patch " +"`distributed` module in vLLM." msgstr "" -"在编写补丁之前,遵循上述原则,我们应尽量修改最少的代码。如果有必要,我们可以修改 **platform** 和 **worker** " -"文件夹中的代码。下面是一个在 vLLM 中修改 `distributed` 模块的示例。" +"在编写补丁前,遵循上述原则,我们应尽可能少地修改代码。如果确有必要,我们可以在 **platform** 或 **worker** " +"文件夹中打补丁。以下是一个修补 vLLM 中 `distributed` 模块的示例。" -#: ../../developer_guide/Design_Documents/patch.md:49 +#: ../../source/developer_guide/Design_Documents/patch.md:39 msgid "" -"Decide which version of vLLM we should patch. For example, after analysis, " -"here we want to patch both 0.9.2 and main of vLLM." -msgstr "决定我们应该修补哪个版本的 vLLM。例如,经过分析后,这里我们想要同时修补 vLLM 的 0.9.2 版和主分支(main)。" +"Decide which version of vLLM we should patch. For example, after " +"analysis, here we want to patch both `0.10.0` and `main` of vLLM." +msgstr "确定我们需要修补哪个版本的 vLLM。例如,经过分析,这里我们想要同时修补 vLLM 的 `0.10.0` 版本和 `main` 分支。" -#: ../../developer_guide/Design_Documents/patch.md:50 +#: ../../source/developer_guide/Design_Documents/patch.md:40 msgid "" "Decide which process we should patch. For example, here `distributed` " "belongs to the vLLM main process, so we should patch `platform`." -msgstr "决定我们应该修补哪个进程。例如,这里 `distributed` 属于 vLLM 主进程,所以我们应该修补 `platform`。" +msgstr "确定我们需要修补哪个进程。例如,这里的 `distributed` 属于 vLLM 主进程,因此我们应该修补 `platform`。" -#: ../../developer_guide/Design_Documents/patch.md:51 -#, python-brace-format +#: ../../source/developer_guide/Design_Documents/patch.md:41 msgid "" "Create the patch file in the right folder. The file should be named as " "`patch_{module_name}.py`. The example here is " -"`vllm_ascend/patch/platform/patch_common/patch_distributed.py`." +"`vllm_ascend/patch/platform/patch_distributed.py`." msgstr "" "在正确的文件夹中创建补丁文件。文件应命名为 `patch_{module_name}.py`。此处的示例是 " -"`vllm_ascend/patch/platform/patch_common/patch_distributed.py`。" +"`vllm_ascend/patch/platform/patch_distributed.py`。" -#: ../../developer_guide/Design_Documents/patch.md:52 +#: ../../source/developer_guide/Design_Documents/patch.md:42 msgid "Write your patch code in the new file. Here is an example:" msgstr "在新文件中编写你的补丁代码。以下是一个示例:" -#: ../../developer_guide/Design_Documents/patch.md:62 +#: ../../source/developer_guide/Design_Documents/patch.md:54 msgid "" "Import the patch file in `__init__.py`. In this example, add `import " -"vllm_ascend.patch.platform.patch_common.patch_distributed` into " -"`vllm_ascend/patch/platform/patch_common/__init__.py`." +"vllm_ascend.patch.platform.patch_distributed` into " +"`vllm_ascend/patch/platform/__init__.py`." msgstr "" -"在 `__init__.py` 中导入补丁文件。在这个示例中,将 `import " -"vllm_ascend.patch.platform.patch_common.patch_distributed` 添加到 " -"`vllm_ascend/patch/platform/patch_common/__init__.py` 中。" +"在 `__init__.py` 中导入补丁文件。在此示例中,将 `import " +"vllm_ascend.patch.platform.patch_distributed` 添加到 `vllm_ascend/patch/platform/__init__.py` 中。" -#: ../../developer_guide/Design_Documents/patch.md:63 +#: ../../source/developer_guide/Design_Documents/patch.md:55 msgid "" "Add the description of the patch in `vllm_ascend/patch/__init__.py`. The " "description format is as follows:" -msgstr "在 `vllm_ascend/patch/__init__.py` 中添加补丁的描述。描述格式如下:" +msgstr "在 `vllm_ascend/patch/__init__.py` 中添加补丁描述。描述格式如下:" -#: ../../developer_guide/Design_Documents/patch.md:77 +#: ../../source/developer_guide/Design_Documents/patch.md:71 msgid "" -"Add the Unit Test and E2E Test. Any newly added code in vLLM Ascend should " -"contain the Unit Test and E2E Test as well. You can find more details in " -"[test guide](../contribution/testing.md)" +"Add the Unit Test and E2E Test. Any newly added code in vLLM Ascend " +"should contain the Unit Test and E2E Test as well. You can find more " +"details in [test guide](../contribution/testing.md)" msgstr "" -"添加单元测试和端到端(E2E)测试。在 vLLM Ascend 中新增的任何代码也应包含单元测试和端到端测试。更多详情请参见 " -"[测试指南](../contribution/testing.md)。" +"添加单元测试和端到端测试。vLLM Ascend 中任何新增的代码都应包含单元测试和端到端测试。更多详情请参阅 [测试指南]" +"(../contribution/testing.md)。" -#: ../../developer_guide/Design_Documents/patch.md:80 -msgid "Limitation" +#: ../../source/developer_guide/Design_Documents/patch.md:73 +msgid "Limitations" msgstr "限制" -#: ../../developer_guide/Design_Documents/patch.md:81 +#: ../../source/developer_guide/Design_Documents/patch.md:75 msgid "" -"In V1 Engine, vLLM starts three kinds of process: Main process, EngineCore " -"process and Worker process. Now vLLM Ascend only support patch the code in " -"Main process and Worker process by default. If you want to patch the code " -"runs in EngineCore process, you should patch EngineCore process entirely " -"during setup, the entry code is here `vllm.v1.engine.core`. Please override " -"`EngineCoreProc` and `DPEngineCoreProc` entirely." +"In V1 Engine, vLLM starts three kinds of processes: Main process, " +"EngineCore process and Worker process. Now vLLM Ascend can only patch the" +" code in Main process and Worker process by default. If you want to patch" +" the code running in EngineCore process, you should patch EngineCore " +"process entirely during setup. Find the entire code in " +"`vllm.v1.engine.core`. Please override `EngineCoreProc` and " +"`DPEngineCoreProc` entirely." msgstr "" -"在 V1 引擎中,vLLM 会启动三种类型的进程:主进程、EngineCore 进程和 Worker 进程。现在 vLLM Ascend " -"默认只支持在主进程和 Worker 进程中打补丁代码。如果你想要在 EngineCore 进程中打补丁,你需要在设置阶段对 EngineCore " -"进程整体打补丁,入口代码在 `vllm.v1.engine.core`。请完全重写 `EngineCoreProc` 和 " -"`DPEngineCoreProc`。" +"在 V1 引擎中,vLLM 启动三种进程:主进程、EngineCore 进程和 Worker 进程。目前 vLLM Ascend " +"默认只能修补主进程和 Worker 进程中的代码。如果你想修补 EngineCore 进程中运行的代码,你需要在设置阶段完全修补 EngineCore " +"进程。相关完整代码位于 `vllm.v1.engine.core`。请完全重写 `EngineCoreProc` 和 `DPEngineCoreProc`。" -#: ../../developer_guide/Design_Documents/patch.md:82 +#: ../../source/developer_guide/Design_Documents/patch.md:76 msgid "" -"If you are running an edited vLLM code, the version of the vLLM may be " -"changed automatically. For example, if you runs an edited vLLM based on " -"v0.9.n, the version of vLLM may be change to v0.9.nxxx, in this case, the " -"patch for v0.9.n in vLLM Ascend would not work as expect, because that vLLM " -"Ascend can't distinguish the version of vLLM you're using. In this case, you" -" can set the environment variable `VLLM_VERSION` to specify the version of " -"vLLM you're using, then the patch for v0.9.2 should work." +"If you are running edited vLLM code, the version of vLLM may be changed " +"automatically. For example, if you run the edited vLLM based on v0.9.n, " +"the version of vLLM may be changed to v0.9.nxxx. In this case, the patch " +"for v0.9.n in vLLM Ascend would not work as expected, because vLLM Ascend" +" can't distinguish the version of the vLLM you're using. In this case, " +"you can set the environment variable `VLLM_VERSION` to specify the " +"version of the vLLM you're using, and then the patch for v0.10.0 should " +"work." msgstr "" -"如果你运行的是经过编辑的 vLLM 代码,vLLM 的版本可能会被自动更改。例如,如果你基于 v0.9.n 运行了编辑后的 vLLM,vLLM " -"的版本可能会变为 v0.9.nxxx,在这种情况下,vLLM Ascend 的 v0.9.n 补丁将无法正常工作,因为 vLLM Ascend " -"无法区分你所使用的 vLLM 版本。这时,你可以设置环境变量 `VLLM_VERSION` 来指定你所使用的 vLLM 版本,这样对 v0.9.2 " -"的补丁就应该可以正常工作。" +"如果你运行的是经过编辑的 vLLM 代码,vLLM 的版本可能会自动更改。例如,如果你基于 v0.9.n 运行编辑后的 vLLM,vLLM " +"的版本可能会变为 v0.9.nxxx。在这种情况下,vLLM Ascend 中针对 v0.9.n 的补丁将无法按预期工作,因为 vLLM Ascend " +"无法区分你正在使用的 vLLM 版本。此时,你可以设置环境变量 `VLLM_VERSION` 来指定你使用的 vLLM 版本,这样针对 v0.10.0 " +"的补丁就应该能正常工作了。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/quantization.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/quantization.po new file mode 100644 index 00000000..dbe5a08d --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/quantization.po @@ -0,0 +1,359 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/developer_guide/Design_Documents/quantization.md:1 +msgid "Quantization Adaptation Guide" +msgstr "量化适配指南" + +#: ../../source/developer_guide/Design_Documents/quantization.md:3 +msgid "" +"This document provides guidance for adapting quantization algorithms and " +"models related to **ModelSlim**." +msgstr "本文档为适配与 **ModelSlim** 相关的量化算法和模型提供指导。" + +#: ../../source/developer_guide/Design_Documents/quantization.md:5 +msgid "Quantization Feature Introduction" +msgstr "量化特性介绍" + +#: ../../source/developer_guide/Design_Documents/quantization.md:7 +msgid "Quantization Inference Process" +msgstr "量化推理流程" + +#: ../../source/developer_guide/Design_Documents/quantization.md:9 +msgid "" +"The current process for registering and obtaining quantization methods in" +" vLLM Ascend is as follows:" +msgstr "当前 vLLM Ascend 中注册和获取量化方法的流程如下:" + +#: ../../source/developer_guide/Design_Documents/quantization.md:11 +msgid "![get_quant_method](../../assets/quantization/get_quant_method.png)" +msgstr "![get_quant_method](../../assets/quantization/get_quant_method.png)" + +#: ../../source/developer_guide/Design_Documents/quantization.md:11 +msgid "get_quant_method" +msgstr "get_quant_method" + +#: ../../source/developer_guide/Design_Documents/quantization.md:13 +msgid "" +"vLLM Ascend registers a custom Ascend quantization method. By configuring" +" the `--quantization ascend` parameter (or `quantization=\"ascend\"` for " +"offline), the quantization feature is enabled. When constructing the " +"`quant_config`, the registered `AscendModelSlimConfig` is initialized and" +" `get_quant_method` is called to obtain the quantization method " +"corresponding to each weight part, stored in the `quant_method` " +"attribute." +msgstr "vLLM Ascend 注册了一个自定义的 Ascend 量化方法。通过配置 `--quantization ascend` 参数(或离线时使用 `quantization=\"ascend\"`),即可启用量化功能。在构建 `quant_config` 时,会初始化已注册的 `AscendModelSlimConfig`,并调用 `get_quant_method` 来获取每个权重部分对应的量化方法,存储在 `quant_method` 属性中。" + +#: ../../source/developer_guide/Design_Documents/quantization.md:15 +msgid "" +"Currently supported quantization methods include `AscendLinearMethod`, " +"`AscendFusedMoEMethod`, `AscendEmbeddingMethod`, and their corresponding " +"non-quantized methods:" +msgstr "当前支持的量化方法包括 `AscendLinearMethod`、`AscendFusedMoEMethod`、`AscendEmbeddingMethod` 及其对应的非量化方法:" + +#: ../../source/developer_guide/Design_Documents/quantization.md:17 +msgid "![quant_methods_overview](../../assets/quantization/quant_methods_overview.png)" +msgstr "![quant_methods_overview](../../assets/quantization/quant_methods_overview.png)" + +#: ../../source/developer_guide/Design_Documents/quantization.md:17 +msgid "quant_methods_overview" +msgstr "quant_methods_overview" + +#: ../../source/developer_guide/Design_Documents/quantization.md:19 +msgid "" +"The quantization method base class defined by vLLM and the overall call " +"flow of quantization methods are as follows:" +msgstr "vLLM 定义的量化方法基类及量化方法的整体调用流程如下:" + +#: ../../source/developer_guide/Design_Documents/quantization.md:21 +msgid "![quant_method_call_flow](../../assets/quantization/quant_method_call_flow.png)" +msgstr "![quant_method_call_flow](../../assets/quantization/quant_method_call_flow.png)" + +#: ../../source/developer_guide/Design_Documents/quantization.md:21 +msgid "quant_method_call_flow" +msgstr "quant_method_call_flow" + +#: ../../source/developer_guide/Design_Documents/quantization.md:23 +msgid "" +"The `embedding` method is generally not implemented for quantization, " +"focusing only on the other three methods." +msgstr "`embedding` 方法通常不实现量化,仅关注其他三种方法。" + +#: ../../source/developer_guide/Design_Documents/quantization.md:25 +msgid "" +"The `create_weights` method is used for weight initialization; the " +"`process_weights_after_loading` method is used for weight post-" +"processing, such as transposition, format conversion, data type " +"conversion, etc.; the `apply` method is used to perform activation " +"quantization and quantized matrix multiplication calculations during the " +"forward process." +msgstr "`create_weights` 方法用于权重初始化;`process_weights_after_loading` 方法用于权重后处理,例如转置、格式转换、数据类型转换等;`apply` 方法用于在前向传播过程中执行激活量化和量化矩阵乘法计算。" + +#: ../../source/developer_guide/Design_Documents/quantization.md:27 +msgid "" +"We need to implement the `create_weights`, " +"`process_weights_after_loading`, and `apply` methods for different " +"**layers** (**attention**, **mlp**, **moe**)." +msgstr "我们需要为不同的**层**(**attention**、**mlp**、**moe**)实现 `create_weights`、`process_weights_after_loading` 和 `apply` 方法。" + +#: ../../source/developer_guide/Design_Documents/quantization.md:29 +msgid "" +"**Supplement**: When loading the model, the quantized model's description" +" file **quant_model_description.json** needs to be read. This file " +"describes the quantization configuration and parameters for each part of " +"the model weights, for example:" +msgstr "**补充说明**:加载模型时,需要读取量化模型的描述文件 **quant_model_description.json**。该文件描述了模型各部分权重的量化配置和参数,例如:" + +#: ../../source/developer_guide/Design_Documents/quantization.md:49 +msgid "" +"Based on the above content, we present a brief description of the " +"adaptation process for quantization algorithms and quantized models." +msgstr "基于以上内容,我们对量化算法和量化模型的适配过程进行简要描述。" + +#: ../../source/developer_guide/Design_Documents/quantization.md:51 +msgid "Quantization Algorithm Adaptation" +msgstr "量化算法适配" + +#: ../../source/developer_guide/Design_Documents/quantization.md:53 +msgid "" +"**Step 1: Algorithm Design**. Define the algorithm ID (e.g., " +"`W4A8_DYNAMIC`), determine supported layers (linear, moe, attention), and" +" design the quantization scheme (static/dynamic, " +"pertensor/perchannel/pergroup)." +msgstr "**步骤 1:算法设计**。定义算法 ID(例如 `W4A8_DYNAMIC`),确定支持的层(linear、moe、attention),并设计量化方案(静态/动态、pertensor/perchannel/pergroup)。" + +#: ../../source/developer_guide/Design_Documents/quantization.md:54 +msgid "" +"**Step 2: Registration**. Use the `@register_scheme` decorator in " +"`vllm_ascend/quantization/methods/registry.py` to register your " +"quantization scheme class." +msgstr "**步骤 2:注册**。在 `vllm_ascend/quantization/methods/registry.py` 中使用 `@register_scheme` 装饰器注册您的量化方案类。" + +#: ../../source/developer_guide/Design_Documents/quantization.md:68 +msgid "" +"**Step 3: Implementation**. Create an algorithm implementation file, such" +" as `vllm_ascend/quantization/methods/w4a8.py`, and implement the method " +"class and logic." +msgstr "**步骤 3:实现**。创建一个算法实现文件,例如 `vllm_ascend/quantization/methods/w4a8.py`,并实现方法类和逻辑。" + +#: ../../source/developer_guide/Design_Documents/quantization.md:69 +msgid "" +"**Step 4: Testing**. Use your algorithm to generate quantization " +"configurations and verify correctness and performance on target models " +"and hardware." +msgstr "**步骤 4:测试**。使用您的算法生成量化配置,并在目标模型和硬件上验证正确性和性能。" + +#: ../../source/developer_guide/Design_Documents/quantization.md:71 +msgid "Quantized Model Adaptation" +msgstr "量化模型适配" + +#: ../../source/developer_guide/Design_Documents/quantization.md:73 +msgid "" +"Adapting a new quantized model requires ensuring the following three " +"points:" +msgstr "适配一个新的量化模型需要确保以下三点:" + +#: ../../source/developer_guide/Design_Documents/quantization.md:75 +msgid "The original model has been successfully adapted in `vLLM Ascend`." +msgstr "原始模型已在 `vLLM Ascend` 中成功适配。" + +#: ../../source/developer_guide/Design_Documents/quantization.md:76 +msgid "" +"**Fused Module Mapping**: Add the model's `model_type` to " +"`packed_modules_model_mapping` in " +"`vllm_ascend/quantization/modelslim_config.py` (e.g., `qkv_proj`, " +"`gate_up_proj`, `experts`) to ensure sharding consistency and correct " +"loading." +msgstr "**融合模块映射**:将模型的 `model_type` 添加到 `vllm_ascend/quantization/modelslim_config.py` 中的 `packed_modules_model_mapping`(例如 `qkv_proj`、`gate_up_proj`、`experts`),以确保分片一致性和正确加载。" + +#: ../../source/developer_guide/Design_Documents/quantization.md:96 +msgid "" +"All quantization algorithms used by the quantized model have been " +"integrated into the `quantization` module." +msgstr "量化模型使用的所有量化算法都已集成到 `quantization` 模块中。" + +#: ../../source/developer_guide/Design_Documents/quantization.md:98 +msgid "Currently Supported Quantization Algorithms" +msgstr "当前支持的量化算法" + +#: ../../source/developer_guide/Design_Documents/quantization.md:100 +msgid "" +"vLLM Ascend supports multiple quantization algorithms. The following " +"table provides an overview of each quantization algorithm based on the " +"implementation in the `vllm_ascend.quantization` module:" +msgstr "vLLM Ascend 支持多种量化算法。下表基于 `vllm_ascend.quantization` 模块中的实现,概述了每种量化算法:" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "Algorithm" +msgstr "算法" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "Weight" +msgstr "权重" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "Activation" +msgstr "激活" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "Weight Granularity" +msgstr "权重粒度" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "Activation Granularity" +msgstr "激活粒度" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "Type" +msgstr "类型" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "Description" +msgstr "描述" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "`W4A16`" +msgstr "`W4A16`" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "INT4" +msgstr "INT4" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "FP16/BF16" +msgstr "FP16/BF16" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "Per-Group" +msgstr "Per-Group" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "Per-Tensor" +msgstr "Per-Tensor" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "Static" +msgstr "静态" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "" +"4-bit weight quantization with 16-bit activation precision, specifically " +"designed for MoE model expert layers, supporting int32 format weight " +"packing" +msgstr "4位权重量化,16位激活精度,专为 MoE 模型专家层设计,支持 int32 格式权重打包" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "`W8A16`" +msgstr "`W8A16`" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "INT8" +msgstr "INT8" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "Per-Channel" +msgstr "Per-Channel" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "" +"8-bit weight quantization with 16-bit activation precision, balancing " +"accuracy and performance, suitable for linear layers" +msgstr "8位权重量化,16位激活精度,平衡精度与性能,适用于线性层" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "`W8A8`" +msgstr "`W8A8`" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "" +"Static activation quantization, suitable for scenarios requiring high " +"precision" +msgstr "静态激活量化,适用于需要高精度的场景" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "`W8A8_DYNAMIC`" +msgstr "`W8A8_DYNAMIC`" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "Per-Token" +msgstr "Per-Token" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "Dynamic" +msgstr "动态" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "Dynamic activation quantization with per-token scaling factor calculation" +msgstr "动态激活量化,按 token 计算缩放因子" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "`W4A8_DYNAMIC`" +msgstr "`W4A8_DYNAMIC`" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "" +"Supports both direct per-channel quantization to 4-bit and two-step " +"quantization (per-channel to 8-bit then per-group to 4-bit)" +msgstr "支持直接按通道量化到4位,以及两步量化(先按通道量化到8位,再按组量化到4位)" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "`W4A4_FLATQUANT_DYNAMIC`" +msgstr "`W4A4_FLATQUANT_DYNAMIC`" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "" +"Uses FlatQuant for activation distribution smoothing before 4-bit dynamic" +" quantization, with additional matrix multiplications for precision " +"preservation" +msgstr "在4位动态量化前使用 FlatQuant 平滑激活分布,并通过额外的矩阵乘法来保持精度" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "`W8A8_MIX`" +msgstr "`W8A8_MIX`" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "Per-Tensor/Token" +msgstr "Per-Tensor/Token" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "Mixed" +msgstr "混合" + +#: ../../source/developer_guide/Design_Documents/quantization.md +msgid "" +"PD Colocation Scenario uses dynamic quantization for both P node and D " +"node; PD Disaggregation Scenario uses dynamic quantization for P node and" +" static for D node" +msgstr "PD 共部署场景下,P节点和D节点均使用动态量化;PD 分离部署场景下,P节点使用动态量化,D节点使用静态量化" + +#: ../../source/developer_guide/Design_Documents/quantization.md:112 +msgid "" +"**Static vs Dynamic:** Static quantization uses pre-computed scaling " +"factors with better performance, while dynamic quantization computes " +"scaling factors on-the-fly for each token/activation tensor with higher " +"precision." +msgstr "**静态与动态:** 静态量化使用预计算的缩放因子,性能更好;而动态量化则为每个 token/激活张量实时计算缩放因子,精度更高。" + +#: ../../source/developer_guide/Design_Documents/quantization.md:114 +msgid "" +"**Granularity:** Refers to the scope of scaling factor computation (e.g.," +" per-tensor, per-channel, per-group)." +msgstr "**粒度:** 指缩放因子计算的范围(例如,per-tensor、per-channel、per-group)。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po index 9e457117..ddd31a2a 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po @@ -4,184 +4,178 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../developer_guide/contribution/index.md:107 +#: ../../source/developer_guide/contribution/index.md:108 msgid "Index" msgstr "索引" -#: ../../developer_guide/contribution/index.md:1 +#: ../../source/developer_guide/contribution/index.md:1 msgid "Contributing" -msgstr "贡献" +msgstr "贡献指南" -#: ../../developer_guide/contribution/index.md:3 -msgid "Building and testing" +#: ../../source/developer_guide/contribution/index.md:3 +msgid "Building and Testing" msgstr "构建与测试" -#: ../../developer_guide/contribution/index.md:4 +#: ../../source/developer_guide/contribution/index.md:5 msgid "" -"It's recommended to set up a local development environment to build and test" -" before you submit a PR." -msgstr "建议先搭建本地开发环境来进行构建和测试,再提交 PR。" +"It's recommended to set up a local development environment to build vllm-" +"ascend and run tests before you submit a PR." +msgstr "建议在提交 PR 之前,先搭建本地开发环境来构建 vllm-ascend 并运行测试。" -#: ../../developer_guide/contribution/index.md:7 -msgid "Setup development environment" -msgstr "搭建开发环境" +#: ../../source/developer_guide/contribution/index.md:8 +msgid "Set up a development environment" +msgstr "设置开发环境" -#: ../../developer_guide/contribution/index.md:9 +#: ../../source/developer_guide/contribution/index.md:10 msgid "" "Theoretically, the vllm-ascend build is only supported on Linux because " "`vllm-ascend` dependency `torch_npu` only supports Linux." -msgstr "" -"理论上,vllm-ascend 构建仅支持 Linux,因为 `vllm-ascend` 的依赖项 `torch_npu` 只支持 Linux。" +msgstr "理论上,vllm-ascend 的构建仅支持 Linux,因为其依赖项 `torch_npu` 仅支持 Linux。" -#: ../../developer_guide/contribution/index.md:12 +#: ../../source/developer_guide/contribution/index.md:13 msgid "" -"But you can still set up dev env on Linux/Windows/macOS for linting and " -"basic test as following commands:" -msgstr "但你仍然可以在 Linux/Windows/macOS 上按照以下命令设置开发环境,用于代码规约检查和基本测试:" +"But you can still set up a development environment on Linux/Windows/macOS" +" for linting and running basic tests." +msgstr "但你仍然可以在 Linux/Windows/macOS 上设置开发环境,用于代码规范检查和运行基本测试。" -#: ../../developer_guide/contribution/index.md:15 +#: ../../source/developer_guide/contribution/index.md:16 msgid "Run lint locally" -msgstr "在本地运行 lint" +msgstr "本地运行代码检查" -#: ../../developer_guide/contribution/index.md:33 +#: ../../source/developer_guide/contribution/index.md:35 msgid "Run CI locally" -msgstr "本地运行CI" +msgstr "本地运行 CI" -#: ../../developer_guide/contribution/index.md:35 -msgid "After complete \"Run lint\" setup, you can run CI locally:" -msgstr "在完成“运行 lint”设置后,你可以在本地运行 CI:" +#: ../../source/developer_guide/contribution/index.md:37 +msgid "After completing \"Run lint\" setup, you can run CI locally:" +msgstr "完成“运行代码检查”设置后,你可以在本地运行 CI:" -#: ../../developer_guide/contribution/index.md:61 +#: ../../source/developer_guide/contribution/index.md:63 msgid "Submit the commit" -msgstr "提交该提交" +msgstr "提交更改" -#: ../../developer_guide/contribution/index.md:68 -msgid "" -"🎉 Congratulations! You have completed the development environment setup." -msgstr "🎉 恭喜!你已经完成了开发环境的搭建。" +#: ../../source/developer_guide/contribution/index.md:70 +msgid "🎉 Congratulations! You have completed the development environment setup." +msgstr "🎉 恭喜!您已完成开发环境的设置。" -#: ../../developer_guide/contribution/index.md:70 -msgid "Test locally" +#: ../../source/developer_guide/contribution/index.md:72 +msgid "Testing locally" msgstr "本地测试" -#: ../../developer_guide/contribution/index.md:72 +#: ../../source/developer_guide/contribution/index.md:74 msgid "" -"You can refer to [Testing](./testing.md) doc to help you setup testing " -"environment and running tests locally." -msgstr "你可以参考 [测试](./testing.md) 文档,帮助你搭建测试环境并在本地运行测试。" +"You can refer to [Testing](./testing.md) to set up a testing environment" +" and running tests locally." +msgstr "你可以参考 [测试](./testing.md) 文档来设置测试环境并在本地运行测试。" -#: ../../developer_guide/contribution/index.md:74 +#: ../../source/developer_guide/contribution/index.md:76 msgid "DCO and Signed-off-by" -msgstr "DCO 和签名确认" +msgstr "DCO 与签署确认" -#: ../../developer_guide/contribution/index.md:76 +#: ../../source/developer_guide/contribution/index.md:78 msgid "" "When contributing changes to this project, you must agree to the DCO. " "Commits must include a `Signed-off-by:` header which certifies agreement " "with the terms of the DCO." -msgstr "当为本项目贡献更改时,您必须同意 DCO。提交必须包含 `Signed-off-by:` 头部,以证明您同意 DCO 的条款。" +msgstr "向本项目贡献更改时,您必须同意 DCO。提交必须包含 `Signed-off-by:` 标头,以证明您同意 DCO 的条款。" -#: ../../developer_guide/contribution/index.md:78 +#: ../../source/developer_guide/contribution/index.md:80 msgid "Using `-s` with `git commit` will automatically add this header." -msgstr "在使用 `git commit` 时加上 `-s` 参数会自动添加这个头部信息。" +msgstr "在 `git commit` 命令中使用 `-s` 参数会自动添加此标头。" -#: ../../developer_guide/contribution/index.md:80 +#: ../../source/developer_guide/contribution/index.md:82 msgid "PR Title and Classification" msgstr "PR 标题与分类" -#: ../../developer_guide/contribution/index.md:82 +#: ../../source/developer_guide/contribution/index.md:84 msgid "" "Only specific types of PRs will be reviewed. The PR title is prefixed " "appropriately to indicate the type of change. Please use one of the " "following:" -msgstr "只有特定类型的 PR 会被审核。PR 标题应使用合适的前缀以指明更改类型。请使用以下之一:" +msgstr "只有特定类型的 PR 会被审核。PR 标题应使用适当的前缀来指明更改类型。请使用以下前缀之一:" -#: ../../developer_guide/contribution/index.md:84 +#: ../../source/developer_guide/contribution/index.md:86 msgid "`[Attention]` for new features or optimization in attention." -msgstr "`[Attention]` 用于注意力机制中新特性或优化。" +msgstr "`[Attention]` 用于注意力机制的新功能或优化。" -#: ../../developer_guide/contribution/index.md:85 +#: ../../source/developer_guide/contribution/index.md:87 msgid "`[Communicator]` for new features or optimization in communicators." -msgstr "`[Communicator]` 适用于通信器中的新特性或优化。" +msgstr "`[Communicator]` 用于通信器的新功能或优化。" -#: ../../developer_guide/contribution/index.md:86 +#: ../../source/developer_guide/contribution/index.md:88 msgid "`[ModelRunner]` for new features or optimization in model runner." -msgstr "`[ModelRunner]` 用于模型运行器中的新功能或优化。" +msgstr "`[ModelRunner]` 用于模型运行器的新功能或优化。" -#: ../../developer_guide/contribution/index.md:87 +#: ../../source/developer_guide/contribution/index.md:89 msgid "`[Platform]` for new features or optimization in platform." -msgstr "`[Platform]` 用于平台中新功能或优化。" +msgstr "`[Platform]` 用于平台的新功能或优化。" -#: ../../developer_guide/contribution/index.md:88 +#: ../../source/developer_guide/contribution/index.md:90 msgid "`[Worker]` for new features or optimization in worker." -msgstr "`[Worker]` 用于 worker 的新功能或优化。" +msgstr "`[Worker]` 用于工作器的新功能或优化。" -#: ../../developer_guide/contribution/index.md:89 +#: ../../source/developer_guide/contribution/index.md:91 msgid "" "`[Core]` for new features or optimization in the core vllm-ascend logic " "(such as platform, attention, communicators, model runner)" -msgstr "`[Core]` 用于核心 vllm-ascend 逻辑中的新特性或优化(例如平台、注意力机制、通信器、模型运行器)。" +msgstr "`[Core]` 用于核心 vllm-ascend 逻辑中的新功能或优化(例如平台、注意力机制、通信器、模型运行器)。" -#: ../../developer_guide/contribution/index.md:90 -msgid "`[Kernel]` changes affecting compute kernels and ops." -msgstr "`[Kernel]` 影响计算内核和操作的更改。" +#: ../../source/developer_guide/contribution/index.md:92 +msgid "`[Kernel]` for changes affecting compute kernels and ops." +msgstr "`[Kernel]` 用于影响计算内核和操作的更改。" -#: ../../developer_guide/contribution/index.md:91 +#: ../../source/developer_guide/contribution/index.md:93 msgid "`[Bugfix]` for bug fixes." -msgstr "`[Bugfix]` 用于表示错误修复。" +msgstr "`[Bugfix]` 用于错误修复。" -#: ../../developer_guide/contribution/index.md:92 +#: ../../source/developer_guide/contribution/index.md:94 msgid "`[Doc]` for documentation fixes and improvements." msgstr "`[Doc]` 用于文档修复和改进。" -#: ../../developer_guide/contribution/index.md:93 +#: ../../source/developer_guide/contribution/index.md:95 msgid "`[Test]` for tests (such as unit tests)." -msgstr "`[Test]` 用于测试(如单元测试)。" +msgstr "`[Test]` 用于测试(例如单元测试)。" -#: ../../developer_guide/contribution/index.md:94 +#: ../../source/developer_guide/contribution/index.md:96 msgid "`[CI]` for build or continuous integration improvements." msgstr "`[CI]` 用于构建或持续集成的改进。" -#: ../../developer_guide/contribution/index.md:95 +#: ../../source/developer_guide/contribution/index.md:97 msgid "" "`[Misc]` for PRs that do not fit the above categories. Please use this " "sparingly." -msgstr "对于不属于上述类别的 PR,请使用 `[Misc]`。请谨慎使用此标签。" +msgstr "`[Misc]` 用于不属于上述类别的 PR。请谨慎使用此标签。" -#: ../../developer_guide/contribution/index.md:98 +#: ../../source/developer_guide/contribution/index.md:100 msgid "" "If the PR spans more than one category, please include all relevant " "prefixes." -msgstr "如果拉取请求(PR)涵盖多个类别,请包含所有相关的前缀。" +msgstr "如果 PR 涉及多个类别,请包含所有相关的前缀。" -#: ../../developer_guide/contribution/index.md:101 +#: ../../source/developer_guide/contribution/index.md:103 msgid "Others" msgstr "其他" -#: ../../developer_guide/contribution/index.md:103 +#: ../../source/developer_guide/contribution/index.md:105 msgid "" "You may find more information about contributing to vLLM Ascend backend " "plugin on " -"[docs.vllm.ai](https://docs.vllm.ai/en/latest/contributing/overview.html)." -" If you find any problem when contributing, you can feel free to submit a PR" -" to improve the doc to help other developers." -msgstr "" -"你可以在 " -"[docs.vllm.ai](https://docs.vllm.ai/en/latest/contributing/overview.html)" -" 上找到有关为 vLLM Ascend 后端插件做贡献的更多信息。如果你在贡献过程中遇到任何问题,欢迎随时提交 PR 来改进文档,以帮助其他开发者。" +"[docs.vllm.ai](https://docs.vllm.ai/en/latest/contributing). If " +"you encounter any problems while contributing, feel free to submit a PR " +"to improve the documentation to help other developers." +msgstr "你可以在 [docs.vllm.ai](https://docs.vllm.ai/en/latest/contributing) 上找到有关为 vLLM Ascend 后端插件做贡献的更多信息。如果在贡献过程中遇到任何问题,欢迎随时提交 PR 来改进文档,以帮助其他开发者。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/multi_node_test.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/multi_node_test.po new file mode 100644 index 00000000..4f2f737a --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/multi_node_test.po @@ -0,0 +1,222 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/developer_guide/contribution/multi_node_test.md:1 +msgid "Multi Node Test" +msgstr "多节点测试" + +#: ../../source/developer_guide/contribution/multi_node_test.md:3 +msgid "" +"Multi-Node CI is designed to test distributed scenarios of very large " +"models, eg: disaggregated_prefill multi DP across multi nodes and so on." +msgstr "多节点CI旨在测试超大规模模型的分布式场景,例如:跨多节点的解耦预填充(disaggregated_prefill)、多数据并行(multi DP)等。" + +#: ../../source/developer_guide/contribution/multi_node_test.md:5 +msgid "How it works" +msgstr "工作原理" + +#: ../../source/developer_guide/contribution/multi_node_test.md:7 +msgid "" +"The following picture shows the basic deployment view of the multi-node " +"CI mechanism. It shows how the GitHub action interacts with " +"[lws](https://lws.sigs.k8s.io/docs/overview/) (a kind of kubernetes crd " +"resource)." +msgstr "下图展示了多节点CI机制的基本部署视图。它说明了GitHub Action如何与[lws](https://lws.sigs.k8s.io/docs/overview/)(一种Kubernetes CRD资源)进行交互。" + +#: ../../source/developer_guide/contribution/multi_node_test.md:9 +msgid "![alt text](../../assets/deployment.png)" +msgstr "![替代文本](../../assets/deployment.png)" + +#: ../../source/developer_guide/contribution/multi_node_test.md:9 +#: ../../source/developer_guide/contribution/multi_node_test.md:13 +msgid "alt text" +msgstr "替代文本" + +#: ../../source/developer_guide/contribution/multi_node_test.md:11 +msgid "" +"From the workflow perspective, we can see how the final test script is " +"executed, The key point is that these two [lws.yaml and " +"run.sh](https://github.com/vllm-project/vllm-" +"ascend/tree/main/tests/e2e/nightly/multi_node/scripts), The former " +"defines how our k8s cluster is pulled up, and the latter defines the " +"entry script when the pod is started, Each node executes different logic " +"according to the " +"[LWS_WORKER_INDEX](https://lws.sigs.k8s.io/docs/reference/labels-" +"annotations-and-environment-variables/) environment variable, so that " +"multiple nodes can form a distributed cluster to perform tasks." +msgstr "从工作流的角度,我们可以看到最终的测试脚本是如何执行的。关键在于这两个文件:[lws.yaml和run.sh](https://github.com/vllm-project/vllm-ascend/tree/main/tests/e2e/nightly/multi_node/scripts)。前者定义了我们的k8s集群如何被拉起,后者定义了Pod启动时的入口脚本。每个节点根据[LWS_WORKER_INDEX](https://lws.sigs.k8s.io/docs/reference/labels-annotations-and-environment-variables/)环境变量执行不同的逻辑,从而使多个节点能够组成一个分布式集群来执行任务。" + +#: ../../source/developer_guide/contribution/multi_node_test.md:13 +msgid "![alt text](../../assets/workflow.png)" +msgstr "![替代文本](../../assets/workflow.png)" + +#: ../../source/developer_guide/contribution/multi_node_test.md:15 +msgid "How to contribute" +msgstr "如何贡献" + +#: ../../source/developer_guide/contribution/multi_node_test.md:17 +msgid "Upload custom weights" +msgstr "上传自定义权重" + +#: ../../source/developer_guide/contribution/multi_node_test.md:19 +msgid "" +"If you need customized weights, for example, you quantized a w8a8 weight " +"for DeepSeek-V3 and you want your weight to run on CI, uploading weights " +"to ModelScope's [vllm-ascend](https://www.modelscope.cn/organization" +"/vllm-ascend) organization is welcome. If you do not have permission to " +"upload, please contact @Potabk" +msgstr "如果您需要自定义权重,例如,您为DeepSeek-V3量化了一个w8a8权重,并希望您的权重能在CI上运行,欢迎将权重上传至ModelScope的[vllm-ascend](https://www.modelscope.cn/organization/vllm-ascend)组织。如果您没有上传权限,请联系@Potabk。" + +#: ../../source/developer_guide/contribution/multi_node_test.md:21 +msgid "Add config yaml" +msgstr "添加配置YAML" + +#: ../../source/developer_guide/contribution/multi_node_test.md:23 +msgid "" +"As the entrypoint script [run.sh](https://github.com/vllm-project/vllm-" +"ascend/blob/0bf3f21a987aede366ec4629ad0ffec8e32fe90d/tests/e2e/nightly/multi_node/scripts/run.sh#L106)" +" shows, a k8s pod startup means traversing all *.yaml files in the " +"[directory](https://github.com/vllm-project/vllm-" +"ascend/tree/main/tests/e2e/nightly/multi_node/config/), reading and " +"executing according to different configurations, so what we need to do is" +" just add \"yamls\" like [DeepSeek-V3.yaml](https://github.com/vllm-" +"project/vllm-" +"ascend/blob/main/tests/e2e/nightly/multi_node/config/DeepSeek-V3.yaml)." +msgstr "如入口脚本[run.sh](https://github.com/vllm-project/vllm-ascend/blob/0bf3f21a987aede366ec4629ad0ffec8e32fe90d/tests/e2e/nightly/multi_node/scripts/run.sh#L106)所示,一个k8s Pod的启动意味着遍历[目录](https://github.com/vllm-project/vllm-ascend/tree/main/tests/e2e/nightly/multi_node/config/)中的所有*.yaml文件,并根据不同的配置读取和执行。因此,我们需要做的就是添加类似[DeepSeek-V3.yaml](https://github.com/vllm-project/vllm-ascend/blob/main/tests/e2e/nightly/multi_node/config/DeepSeek-V3.yaml)的\"yaml\"文件。" + +#: ../../source/developer_guide/contribution/multi_node_test.md:25 +msgid "" +"Suppose you have **2 nodes** running a 1P1D setup (1 Prefillers + 1 " +"Decoder):" +msgstr "假设您有**2个节点**运行1P1D设置(1个预填充器 + 1个解码器):" + +#: ../../source/developer_guide/contribution/multi_node_test.md:27 +msgid "you may add a config file looks like:" +msgstr "您可以添加一个类似这样的配置文件:" + +#: ../../source/developer_guide/contribution/multi_node_test.md:73 +msgid "Add the case to nightly workflow" +msgstr "将用例添加到夜间工作流" + +#: ../../source/developer_guide/contribution/multi_node_test.md:75 +msgid "" +"Currently, the multi-node test workflow is defined in the " +"[nightly_test_a3.yaml](https://github.com/vllm-project/vllm-" +"ascend/blob/main/.github/workflows/schedule_nightly_test_a3.yaml)" +msgstr "目前,多节点测试工作流定义在[nightly_test_a3.yaml](https://github.com/vllm-project/vllm-ascend/blob/main/.github/workflows/schedule_nightly_test_a3.yaml)中。" + +#: ../../source/developer_guide/contribution/multi_node_test.md:110 +msgid "" +"The matrix above defines all the parameters required to add a multi-" +"machine use case. The parameters worth noting (if you are adding a new " +"use case) are `size` and the path to the yaml configuration file. The " +"former defines the number of nodes required for your use case, and the " +"latter defines the path to the configuration file you have completed in " +"step 2." +msgstr "上面的矩阵定义了添加一个多机用例所需的所有参数。值得注意的参数(如果您正在添加一个新用例)是`size`和yaml配置文件的路径。前者定义了您的用例所需的节点数量,后者定义了您在步骤2中完成的配置文件的路径。" + +#: ../../source/developer_guide/contribution/multi_node_test.md:112 +msgid "Run Multi-Node tests locally" +msgstr "本地运行多节点测试" + +#: ../../source/developer_guide/contribution/multi_node_test.md:114 +msgid "1. Use kubernetes" +msgstr "1. 使用Kubernetes" + +#: ../../source/developer_guide/contribution/multi_node_test.md:116 +msgid "" +"This section assumes that you already have a " +"[Kubernetes](https://kubernetes.io/docs/setup/) NPU cluster environment " +"locally. Then you can easily start our test with one click." +msgstr "本节假设您本地已经有一个[Kubernetes](https://kubernetes.io/docs/setup/) NPU集群环境。然后您可以轻松地一键启动我们的测试。" + +#: ../../source/developer_guide/contribution/multi_node_test.md:118 +msgid "Step 1. Install LWS CRD resources" +msgstr "步骤 1. 安装LWS CRD资源" + +#: ../../source/developer_guide/contribution/multi_node_test.md:120 +msgid "" +"See Which can be used as a " +"reference" +msgstr "参考" + +#: ../../source/developer_guide/contribution/multi_node_test.md:122 +msgid "Step 2. Deploy the following yaml file `lws.yaml` as what you want" +msgstr "步骤 2. 按需部署以下yaml文件`lws.yaml`" + +#: ../../source/developer_guide/contribution/multi_node_test.md:258 +msgid "Verify the status of the pods:" +msgstr "验证Pod的状态:" + +#: ../../source/developer_guide/contribution/multi_node_test.md:264 +msgid "Should get an output similar to this:" +msgstr "应该会得到类似这样的输出:" + +#: ../../source/developer_guide/contribution/multi_node_test.md:272 +msgid "Verify that the distributed inference works:" +msgstr "验证分布式推理是否正常工作:" + +#: ../../source/developer_guide/contribution/multi_node_test.md:278 +msgid "Should get something similar to this:" +msgstr "应该会得到类似这样的结果:" + +#: ../../source/developer_guide/contribution/multi_node_test.md:312 +msgid "2. Test without kubernetes" +msgstr "2. 不使用Kubernetes进行测试" + +#: ../../source/developer_guide/contribution/multi_node_test.md:314 +msgid "" +"Since our script is Kubernetes-friendly, we need to actively pass in some" +" cluster information if you don't have a Kubernetes environment." +msgstr "由于我们的脚本对Kubernetes友好,如果您没有Kubernetes环境,则需要主动传入一些集群信息。" + +#: ../../source/developer_guide/contribution/multi_node_test.md:316 +msgid "Step 1. Add cluster_hosts to config yamls" +msgstr "步骤 1. 向配置YAML文件添加cluster_hosts" + +#: ../../source/developer_guide/contribution/multi_node_test.md:318 +msgid "" +"Modify on every cluster host, commands just like " +"[DeepSeek-V3.yaml](https://github.com/vllm-project/vllm-" +"ascend/blob/e760aae1df7814073a4180172385505c1ec0fd83/tests/e2e/nightly/multi_node/config/DeepSeek-V3.yaml#L25)" +" after the configure item `num_nodes` , for example: `cluster_hosts: " +"[\"xxx.xxx.xxx.188\", \"xxx.xxx.xxx.212\"]`" +msgstr "在每个集群主机上进行修改,就像[DeepSeek-V3.yaml](https://github.com/vllm-project/vllm-ascend/blob/e760aae1df7814073a4180172385505c1ec0fd83/tests/e2e/nightly/multi_node/config/DeepSeek-V3.yaml#L25)那样,在配置项`num_nodes`之后添加,例如:`cluster_hosts: [\"xxx.xxx.xxx.188\", \"xxx.xxx.xxx.212\"]`" + +#: ../../source/developer_guide/contribution/multi_node_test.md:321 +msgid "Step 2. Install develop environment" +msgstr "步骤 2. 安装开发环境" + +#: ../../source/developer_guide/contribution/multi_node_test.md:322 +msgid "Install vllm-ascend develop packages on every cluster host" +msgstr "在每个集群主机上安装vllm-ascend开发包" + +#: ../../source/developer_guide/contribution/multi_node_test.md:329 +msgid "Install AISBench on the first host(leader node) in cluster_hosts" +msgstr "在cluster_hosts中的第一个主机(主节点)上安装AISBench" + +#: ../../source/developer_guide/contribution/multi_node_test.md:341 +msgid "Step 3. Running test locally" +msgstr "步骤 3. 本地运行测试" + +#: ../../source/developer_guide/contribution/multi_node_test.md:343 +msgid "Run the script on **each node separately**" +msgstr "在**每个节点上分别**运行脚本" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po index be76daa7..a46f5eee 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po @@ -4,234 +4,289 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../developer_guide/contribution/testing.md:1 +#: ../../source/developer_guide/contribution/testing.md:1 msgid "Testing" msgstr "测试" -#: ../../developer_guide/contribution/testing.md:3 +#: ../../source/developer_guide/contribution/testing.md:3 msgid "" -"This secition explains how to write e2e tests and unit tests to verify the " -"implementation of your feature." -msgstr "本节介绍如何编写端到端测试和单元测试,以验证你的功能实现。" +"This document explains how to write E2E tests and unit tests to verify " +"the implementation of your feature." +msgstr "本文档介绍如何编写端到端测试和单元测试,以验证您实现的功能。" -#: ../../developer_guide/contribution/testing.md:5 -msgid "Setup test environment" +#: ../../source/developer_guide/contribution/testing.md:5 +msgid "Set up a test environment" msgstr "设置测试环境" -#: ../../developer_guide/contribution/testing.md:7 +#: ../../source/developer_guide/contribution/testing.md:7 msgid "" -"The fastest way to setup test environment is to use the main branch " +"The fastest way to set up a test environment is to use the main branch's " "container image:" -msgstr "搭建测试环境最快的方法是使用 main 分支的容器镜像:" +msgstr "设置测试环境最快的方法是使用 main 分支的容器镜像:" -#: ../../developer_guide/contribution/testing.md +#: ../../source/developer_guide/contribution/testing.md msgid "Local (CPU)" msgstr "本地(CPU)" -#: ../../developer_guide/contribution/testing.md:18 -msgid "You can run the unit tests on CPU with the following steps:" -msgstr "你可以按照以下步骤在 CPU 上运行单元测试:" +#: ../../source/developer_guide/contribution/testing.md:18 +msgid "You can run the unit tests on CPUs with the following steps:" +msgstr "您可以按照以下步骤在 CPU 上运行单元测试:" -#: ../../developer_guide/contribution/testing.md +#: ../../source/developer_guide/contribution/testing.md msgid "Single card" -msgstr "单张卡片" +msgstr "单卡" -#: ../../developer_guide/contribution/testing.md:85 -#: ../../developer_guide/contribution/testing.md:123 -msgid "" -"After starting the container, you should install the required packages:" -msgstr "启动容器后,你应该安装所需的软件包:" +#: ../../source/developer_guide/contribution/testing.md:96 +#: ../../source/developer_guide/contribution/testing.md:135 +msgid "After starting the container, you should install the required packages:" +msgstr "启动容器后,您应该安装所需的软件包:" -#: ../../developer_guide/contribution/testing.md +#: ../../source/developer_guide/contribution/testing.md msgid "Multi cards" msgstr "多卡" -#: ../../developer_guide/contribution/testing.md:137 +#: ../../source/developer_guide/contribution/testing.md:149 msgid "Running tests" msgstr "运行测试" -#: ../../developer_guide/contribution/testing.md:139 -msgid "Unit test" +#: ../../source/developer_guide/contribution/testing.md:151 +msgid "Unit tests" msgstr "单元测试" -#: ../../developer_guide/contribution/testing.md:141 +#: ../../source/developer_guide/contribution/testing.md:153 msgid "There are several principles to follow when writing unit tests:" -msgstr "编写单元测试时需要遵循几个原则:" +msgstr "编写单元测试时需要遵循以下几个原则:" -#: ../../developer_guide/contribution/testing.md:143 +#: ../../source/developer_guide/contribution/testing.md:155 msgid "" -"The test file path should be consistent with source file and start with " -"`test_` prefix, such as: `vllm_ascend/worker/worker.py` --> " +"The test file path should be consistent with the source file and start " +"with the `test_` prefix, such as: `vllm_ascend/worker/worker.py` --> " "`tests/ut/worker/test_worker.py`" msgstr "" -"测试文件的路径应与源文件保持一致,并以 `test_` 前缀开头,例如:`vllm_ascend/worker/worker.py` --> " +"测试文件路径应与源文件保持一致,并以 `test_` 前缀开头,例如:`vllm_ascend/worker/worker.py` --> " "`tests/ut/worker/test_worker.py`" -#: ../../developer_guide/contribution/testing.md:144 +#: ../../source/developer_guide/contribution/testing.md:156 msgid "" -"The vLLM Ascend test are using unittest framework, see " -"[here](https://docs.python.org/3/library/unittest.html#module-unittest) to " -"understand how to write unit tests." +"The vLLM Ascend test uses unittest framework. See [the Python unittest " +"documentation](https://docs.python.org/3/library/unittest.html#module-" +"unittest) to understand how to write unit tests." msgstr "" -"vLLM Ascend 测试使用 unittest " -"框架,参见[这里](https://docs.python.org/3/library/unittest.html#module-" -"unittest)了解如何编写单元测试。" +"vLLM Ascend 测试使用 unittest 框架。请参阅 [Python unittest " +"文档](https://docs.python.org/3/library/unittest.html#module-unittest) 以了解如何编写单元测试。" -#: ../../developer_guide/contribution/testing.md:145 +#: ../../source/developer_guide/contribution/testing.md:157 msgid "" -"All unit tests can be run on CPU, so you must mock the device-related " -"function to host." -msgstr "所有单元测试都可以在 CPU 上运行,因此你必须将与设备相关的函数模拟为 host。" +"All unit tests can be run on CPUs, so you must mock the device-related " +"functions on the host." +msgstr "所有单元测试都可以在 CPU 上运行,因此您必须在主机上模拟与设备相关的函数。" -#: ../../developer_guide/contribution/testing.md:146 +#: ../../source/developer_guide/contribution/testing.md:158 msgid "" -"Example: [tests/ut/test_ascend_config.py](https://github.com/vllm-" -"project/vllm-ascend/blob/main/tests/ut/test_ascend_config.py)." +"Example: [tests/ut/test_ascend_config.py](https://github.com/vllm-project" +"/vllm-ascend/blob/main/tests/ut/test_ascend_config.py)." msgstr "" "示例:[tests/ut/test_ascend_config.py](https://github.com/vllm-project/vllm-" "ascend/blob/main/tests/ut/test_ascend_config.py)。" -#: ../../developer_guide/contribution/testing.md:147 +#: ../../source/developer_guide/contribution/testing.md:159 msgid "You can run the unit tests using `pytest`:" -msgstr "你可以使用 `pytest` 运行单元测试:" +msgstr "您可以使用 `pytest` 运行单元测试:" -#: ../../developer_guide/contribution/testing.md -msgid "Multi cards test" -msgstr "多卡测试" +#: ../../source/developer_guide/contribution/testing.md +msgid "Single-card" +msgstr "单卡" -#: ../../developer_guide/contribution/testing.md:192 +#: ../../source/developer_guide/contribution/testing.md +msgid "Multi-card" +msgstr "多卡" + +#: ../../source/developer_guide/contribution/testing.md:206 msgid "E2E test" msgstr "端到端测试" -#: ../../developer_guide/contribution/testing.md:194 +#: ../../source/developer_guide/contribution/testing.md:208 msgid "" -"Although vllm-ascend CI provide [e2e test](https://github.com/vllm-" -"project/vllm-ascend/blob/main/.github/workflows/vllm_ascend_test.yaml) on " -"Ascend CI, you can run it locally." +"Although vllm-ascend CI provides E2E tests on Ascend CI (for example, " +"[schedule_nightly_test_a2.yaml](https://github.com/vllm-project/vllm-" +"ascend/blob/main/.github/workflows/schedule_nightly_test_a2.yaml), " +"[schedule_nightly_test_a3.yaml](https://github.com/vllm-project/vllm-" +"ascend/blob/main/.github/workflows/schedule_nightly_test_a3.yaml), " +"[pr_test_full.yaml](https://github.com/vllm-project/vllm-" +"ascend/blob/main/.github/workflows/pr_test_full.yaml)), you can run them " +"locally." msgstr "" -"虽然 vllm-ascend CI 在 Ascend CI 上提供了 [端到端测试](https://github.com/vllm-" -"project/vllm-" -"ascend/blob/main/.github/workflows/vllm_ascend_test.yaml),你也可以在本地运行它。" +"虽然 vllm-ascend CI 在 Ascend CI 上提供了端到端测试(例如,[schedule_nightly_test_a2.yaml](https://github.com/vllm-project/vllm-" +"ascend/blob/main/.github/workflows/schedule_nightly_test_a2.yaml)、[schedule_nightly_test_a3.yaml](https://github.com/vllm-project/vllm-" +"ascend/blob/main/.github/workflows/schedule_nightly_test_a3.yaml)、[pr_test_full.yaml](https://github.com/vllm-project/vllm-" +"ascend/blob/main/.github/workflows/pr_test_full.yaml)),但您也可以在本地运行它们。" -#: ../../developer_guide/contribution/testing.md:204 -msgid "You can't run e2e test on CPU." -msgstr "你无法在 CPU 上运行 e2e 测试。" +#: ../../source/developer_guide/contribution/testing.md:218 +msgid "You can't run the E2E test on CPUs." +msgstr "您无法在 CPU 上运行端到端测试。" -#: ../../developer_guide/contribution/testing.md:240 +#: ../../source/developer_guide/contribution/testing.md:257 msgid "" -"This will reproduce e2e test: " +"This will reproduce the E2E test. See " "[vllm_ascend_test.yaml](https://github.com/vllm-project/vllm-" "ascend/blob/main/.github/workflows/vllm_ascend_test.yaml)." msgstr "" -"这将复现端到端测试:[vllm_ascend_test.yaml](https://github.com/vllm-project/vllm-" +"这将复现端到端测试。请参阅 [vllm_ascend_test.yaml](https://github.com/vllm-project/vllm-" "ascend/blob/main/.github/workflows/vllm_ascend_test.yaml)。" -#: ../../developer_guide/contribution/testing.md:242 -msgid "E2E test example:" -msgstr "E2E 测试示例:" +#: ../../source/developer_guide/contribution/testing.md:259 +msgid "" +"For running nightly multi-node test cases locally, refer to the `Running " +"Locally` section in [Multi Node Test](./multi_node_test.md)." +msgstr "要在本地运行夜间多节点测试用例,请参阅 [多节点测试](./multi_node_test.md) 中的 `本地运行` 部分。" -#: ../../developer_guide/contribution/testing.md:244 +#: ../../source/developer_guide/contribution/testing.md:261 +msgid "E2E test example" +msgstr "端到端测试示例" + +#: ../../source/developer_guide/contribution/testing.md:263 msgid "" "Offline test example: " -"[`tests/e2e/singlecard/test_offline_inference.py`](https://github.com/vllm-" -"project/vllm-" +"[`tests/e2e/singlecard/test_offline_inference.py`](https://github.com" +"/vllm-project/vllm-" "ascend/blob/main/tests/e2e/singlecard/test_offline_inference.py)" msgstr "" -"离线测试示例:[`tests/e2e/singlecard/test_offline_inference.py`](https://github.com/vllm-" -"project/vllm-" +"离线测试示例:[`tests/e2e/singlecard/test_offline_inference.py`](https://github.com" +"/vllm-project/vllm-" "ascend/blob/main/tests/e2e/singlecard/test_offline_inference.py)" -#: ../../developer_guide/contribution/testing.md:245 +#: ../../source/developer_guide/contribution/testing.md:264 msgid "" "Online test examples: " -"[`tests/e2e/singlecard/test_prompt_embedding.py`](https://github.com/vllm-" -"project/vllm-ascend/blob/main/tests/e2e/singlecard/test_prompt_embedding.py)" +"[`tests/e2e/singlecard/test_prompt_embedding.py`](https://github.com" +"/vllm-project/vllm-" +"ascend/blob/main/tests/e2e/singlecard/test_prompt_embedding.py)" msgstr "" -"在线测试示例:[`tests/e2e/singlecard/test_prompt_embedding.py`](https://github.com/vllm-" -"project/vllm-ascend/blob/main/tests/e2e/singlecard/test_prompt_embedding.py)" +"在线测试示例:[`tests/e2e/singlecard/test_prompt_embedding.py`](https://github.com" +"/vllm-project/vllm-" +"ascend/blob/main/tests/e2e/singlecard/test_prompt_embedding.py)" -#: ../../developer_guide/contribution/testing.md:246 +#: ../../source/developer_guide/contribution/testing.md:265 msgid "" "Correctness test example: " -"[`tests/e2e/singlecard/test_aclgraph_accuracy.py`](https://github.com/vllm-" -"project/vllm-ascend/blob/main/tests/e2e/singlecard/test_aclgraph_accuracy.py)" +"[`tests/e2e/singlecard/test_aclgraph_accuracy.py`](https://github.com" +"/vllm-project/vllm-" +"ascend/blob/main/tests/e2e/singlecard/test_aclgraph_accuracy.py)" msgstr "" -"正确性测试示例:[`tests/e2e/singlecard/test_aclgraph_accuracy.py`](https://github.com/vllm-" -"project/vllm-ascend/blob/main/tests/e2e/singlecard/test_aclgraph_accuracy.py)" +"正确性测试示例:[`tests/e2e/singlecard/test_aclgraph_accuracy.py`](https://github.com" +"/vllm-project/vllm-" +"ascend/blob/main/tests/e2e/singlecard/test_aclgraph_accuracy.py)" -#: ../../developer_guide/contribution/testing.md:247 +#: ../../source/developer_guide/contribution/testing.md:267 msgid "" -"Reduced Layer model test example: [test_torchair_graph_mode.py - " -"DeepSeek-V3-Pruning](https://github.com/vllm-project/vllm-" -"ascend/blob/20767a043cccb3764214930d4695e53941de87ec/tests/e2e/multicard/test_torchair_graph_mode.py#L48)" -msgstr "" -"简化层模型测试示例:[test_torchair_graph_mode.py - " -"DeepSeek-V3-Pruning](https://github.com/vllm-project/vllm-" -"ascend/blob/20767a043cccb3764214930d4695e53941de87ec/tests/e2e/multicard/test_torchair_graph_mode.py#L48)" +"The CI resource is limited, and you might need to reduce the number of " +"layers of a model. Below is an example of how to generate a reduced layer" +" model:" +msgstr "CI 资源有限,您可能需要减少模型的层数。以下是如何生成缩减层数模型的示例:" -#: ../../developer_guide/contribution/testing.md:249 +#: ../../source/developer_guide/contribution/testing.md:268 msgid "" -"The CI resource is limited, you might need to reduce layer number of the " -"model, below is an example of how to generate a reduced layer model:" -msgstr "CI 资源有限,您可能需要减少模型的层数,下面是一个生成减少层数模型的示例:" +"Fork the original model repo in modelscope. All the files in the repo " +"except for weights are required." +msgstr "在 ModelScope 中 Fork 原始模型仓库。需要仓库中除权重文件外的所有文件。" -#: ../../developer_guide/contribution/testing.md:250 -msgid "" -"Fork the original model repo in modelscope, we need all the files in the " -"repo except for weights." -msgstr "在 modelscope 中 fork 原始模型仓库,我们需要仓库中的所有文件,除了权重文件。" - -#: ../../developer_guide/contribution/testing.md:251 +#: ../../source/developer_guide/contribution/testing.md:269 #, python-brace-format msgid "" "Set `num_hidden_layers` to the expected number of layers, e.g., " "`{\"num_hidden_layers\": 2,}`" msgstr "将 `num_hidden_layers` 设置为期望的层数,例如 `{\"num_hidden_layers\": 2,}`" -#: ../../developer_guide/contribution/testing.md:252 +#: ../../source/developer_guide/contribution/testing.md:270 msgid "" "Copy the following python script as `generate_random_weight.py`. Set the " -"relevant parameters `MODEL_LOCAL_PATH`, `DIST_DTYPE` and `DIST_MODEL_PATH` " -"as needed:" +"relevant parameters `MODEL_LOCAL_PATH`, `DIST_DTYPE` and " +"`DIST_MODEL_PATH` as needed:" msgstr "" -"将以下 Python 脚本复制为 `generate_random_weight.py`。根据需要设置相关参数 " -"`MODEL_LOCAL_PATH`、`DIST_DTYPE` 和 `DIST_MODEL_PATH`:" +"将以下 Python 脚本复制为 `generate_random_weight.py`。根据需要设置相关参数 `MODEL_LOCAL_PATH`、`DIST_DTYPE` 和 `DIST_MODEL_PATH`:" -#: ../../developer_guide/contribution/testing.md:270 +#: ../../source/developer_guide/contribution/testing.md:288 +msgid "View CI log summary in GitHub Actions" +msgstr "在 GitHub Actions 中查看 CI 日志摘要" + +#: ../../source/developer_guide/contribution/testing.md:290 +msgid "" +"After a CI job finishes, you can open the corresponding GitHub Actions " +"job page and check the `Summary` tab to view the generated CI log " +"summary." +msgstr "CI 作业完成后,您可以打开相应的 GitHub Actions 作业页面,并查看 `Summary` 选项卡以查看生成的 CI 日志摘要。" + +#: ../../source/developer_guide/contribution/testing.md:293 +msgid "![GitHub Actions CI log summary](../../assets/ci_log_summary.png)" +msgstr "![GitHub Actions CI 日志摘要](../../assets/ci_log_summary.png)" + +#: ../../source/developer_guide/contribution/testing.md:293 +msgid "GitHub Actions CI log summary" +msgstr "GitHub Actions CI 日志摘要" + +#: ../../source/developer_guide/contribution/testing.md:295 +msgid "" +"The summary is intended to help developers triage failures more quickly. " +"It may include:" +msgstr "该摘要旨在帮助开发者更快地排查故障。它可能包括:" + +#: ../../source/developer_guide/contribution/testing.md:297 +msgid "failed test files" +msgstr "失败的测试文件" + +#: ../../source/developer_guide/contribution/testing.md:298 +msgid "failed test cases" +msgstr "失败的测试用例" + +#: ../../source/developer_guide/contribution/testing.md:299 +msgid "distinct root-cause errors" +msgstr "不同的根本原因错误" + +#: ../../source/developer_guide/contribution/testing.md:300 +msgid "short error context extracted from the job log" +msgstr "从作业日志中提取的简短错误上下文" + +#: ../../source/developer_guide/contribution/testing.md:302 +msgid "" +"This summary is generated from the job log by " +"`/.github/workflows/scripts/ci_log_summary_v2.py` for unit-test and e2e " +"workflows." +msgstr "该摘要是由 `/.github/workflows/scripts/ci_log_summary_v2.py` 从作业日志中为单元测试和端到端测试工作流生成的。" + +#: ../../source/developer_guide/contribution/testing.md:305 msgid "Run doctest" msgstr "运行 doctest" -#: ../../developer_guide/contribution/testing.md:272 +#: ../../source/developer_guide/contribution/testing.md:307 msgid "" -"vllm-ascend provides a `vllm-ascend/tests/e2e/run_doctests.sh` command to " -"run all doctests in the doc files. The doctest is a good way to make sure " -"the docs are up to date and the examples are executable, you can run it " +"vllm-ascend provides a `vllm-ascend/tests/e2e/run_doctests.sh` command to" +" run all doctests in the doc files. The doctest is a good way to make " +"sure docs stay current and examples remain executable, which can be run " "locally as follows:" msgstr "" -"vllm-ascend 提供了一个 `vllm-ascend/tests/e2e/run_doctests.sh` 命令,用于运行文档文件中的所有 " -"doctest。doctest 是确保文档保持最新且示例可执行的好方法,你可以按照以下方式在本地运行它:" +"vllm-ascend 提供了一个 `vllm-ascend/tests/e2e/run_doctests.sh` 命令来运行文档文件中的所有 doctest。doctest " +"是确保文档保持最新且示例保持可执行性的好方法,可以按如下方式在本地运行:" -#: ../../developer_guide/contribution/testing.md:280 +#: ../../source/developer_guide/contribution/testing.md:315 msgid "" -"This will reproduce the same environment as the CI: " +"This will reproduce the same environment as the CI. See " "[vllm_ascend_doctest.yaml](https://github.com/vllm-project/vllm-" "ascend/blob/main/.github/workflows/vllm_ascend_doctest.yaml)." msgstr "" -"这将复现与 CI 相同的环境:[vllm_ascend_doctest.yaml](https://github.com/vllm-" -"project/vllm-ascend/blob/main/.github/workflows/vllm_ascend_doctest.yaml)。" +"这将复现与 CI 相同的环境。请参阅 [vllm_ascend_doctest.yaml](https://github.com/vllm-project/vllm-" +"ascend/blob/main/.github/workflows/vllm_ascend_doctest.yaml)。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po new file mode 100644 index 00000000..417db95d --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po @@ -0,0 +1,238 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:1 +msgid "Using AISBench" +msgstr "使用 AISBench" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:3 +msgid "" +"This document guides you to conduct accuracy testing using " +"[AISBench](https://gitee.com/aisbench/benchmark/tree/master). AISBench " +"provides accuracy and performance evaluation for many datasets." +msgstr "本文档指导您如何使用 [AISBench](https://gitee.com/aisbench/benchmark/tree/master) 进行精度测试。AISBench 为许多数据集提供了精度和性能评估。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:5 +msgid "Online Server" +msgstr "在线服务器" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:7 +msgid "1. Start the vLLM server" +msgstr "1. 启动 vLLM 服务器" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:9 +msgid "You can run docker container to start the vLLM server on a single NPU:" +msgstr "您可以运行 docker 容器在单个 NPU 上启动 vLLM 服务器:" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:37 +msgid "Run the vLLM server in the docker." +msgstr "在 docker 中运行 vLLM 服务器。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:45 +msgid "" +"`--max_model_len` should be greater than `35000`, this will be suitable " +"for most datasets. Otherwise the accuracy evaluation may be affected." +msgstr "`--max_model_len` 应大于 `35000`,这适用于大多数数据集。否则可能会影响精度评估。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:48 +msgid "The vLLM server is started successfully, if you see logs as below:" +msgstr "如果看到如下日志,则 vLLM 服务器启动成功:" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:56 +msgid "2. Run different datasets using AISBench" +msgstr "2. 使用 AISBench 运行不同数据集" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:58 +msgid "Install AISBench" +msgstr "安装 AISBench" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:60 +msgid "" +"Refer to [AISBench](https://gitee.com/aisbench/benchmark/tree/master) for" +" details. Install AISBench from source." +msgstr "详情请参考 [AISBench](https://gitee.com/aisbench/benchmark/tree/master)。从源码安装 AISBench。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:69 +msgid "Install extra AISBench dependencies." +msgstr "安装额外的 AISBench 依赖项。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:76 +msgid "Run `ais_bench -h` to check the installation." +msgstr "运行 `ais_bench -h` 以检查安装。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:78 +msgid "Download Dataset" +msgstr "下载数据集" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:80 +msgid "You can choose one or multiple datasets to execute accuracy evaluation." +msgstr "您可以选择一个或多个数据集来执行精度评估。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:82 +msgid "`C-Eval` dataset." +msgstr "`C-Eval` 数据集。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:84 +msgid "" +"Take `C-Eval` dataset as an example. You can refer to " +"[Datasets](https://gitee.com/aisbench/benchmark/tree/master/ais_bench/benchmark/configs/datasets)" +" for more datasets. Each dataset has a `README.md` with detailed download" +" and installation instructions." +msgstr "以 `C-Eval` 数据集为例。更多数据集请参考 [Datasets](https://gitee.com/aisbench/benchmark/tree/master/ais_bench/benchmark/configs/datasets)。每个数据集都有一个 `README.md` 文件,包含详细的下载和安装说明。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:86 +msgid "Download dataset and install it to specific path." +msgstr "下载数据集并安装到指定路径。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:98 +msgid "`MMLU` dataset." +msgstr "`MMLU` 数据集。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:107 +msgid "`GPQA` dataset." +msgstr "`GPQA` 数据集。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:116 +msgid "`MATH` dataset." +msgstr "`MATH` 数据集。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:125 +msgid "`LiveCodeBench` dataset." +msgstr "`LiveCodeBench` 数据集。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:133 +msgid "`AIME 2024` dataset." +msgstr "`AIME 2024` 数据集。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:144 +msgid "`GSM8K` dataset." +msgstr "`GSM8K` 数据集。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:153 +msgid "Configuration" +msgstr "配置" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:155 +msgid "" +"Update the file " +"`benchmark/ais_bench/benchmark/configs/models/vllm_api/vllm_api_general_chat.py`." +" There are several arguments that you should update according to your " +"environment." +msgstr "更新文件 `benchmark/ais_bench/benchmark/configs/models/vllm_api/vllm_api_general_chat.py`。有几个参数需要根据您的环境进行更新。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:158 +msgid "" +"`attr`: Identifier for the inference backend type, fixed as `service` " +"(serving-based inference) or `local` (local model)." +msgstr "`attr`:推理后端类型的标识符,固定为 `service`(基于服务的推理)或 `local`(本地模型)。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:159 +msgid "`type`: Used to select different backend API types." +msgstr "`type`:用于选择不同的后端 API 类型。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:160 +msgid "" +"`abbr`: Unique identifier for a local task, used to distinguish between " +"multiple tasks." +msgstr "`abbr`:本地任务的唯一标识符,用于区分多个任务。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:161 +msgid "`path`: Update to your model weight path." +msgstr "`path`:更新为您的模型权重路径。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:162 +msgid "`model`: Update to your model name in vLLM." +msgstr "`model`:更新为您的 vLLM 中的模型名称。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:163 +msgid "`host_ip` and `host_port`: Update to your vLLM server ip and port." +msgstr "`host_ip` 和 `host_port`:更新为您的 vLLM 服务器的 IP 和端口。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:164 +msgid "" +"`max_out_len`: Note `max_out_len` + LLM input length should be less than " +"`max-model-len`(config in your vllm server), `32768` will be suitable for" +" most datasets." +msgstr "`max_out_len`:注意 `max_out_len` + LLM 输入长度应小于 `max-model-len`(在您的 vllm 服务器中配置),`32768` 适用于大多数数据集。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:165 +msgid "`batch_size`: Update according to your dataset." +msgstr "`batch_size`:根据您的数据集进行更新。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:166 +msgid "`temperature`: Update inference argument." +msgstr "`temperature`:更新推理参数。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:199 +msgid "Execute Accuracy Evaluation" +msgstr "执行精度评估" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:201 +msgid "Run the following code to execute different accuracy evaluation." +msgstr "运行以下代码以执行不同的精度评估。" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:224 +msgid "" +"After each dataset execution, you can get the result from saved files " +"such as `outputs/default/20250628_151326`, there is an example as " +"follows:" +msgstr "每个数据集执行后,您可以从保存的文件(例如 `outputs/default/20250628_151326`)中获取结果,示例如下:" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:249 +msgid "Execute Performance Evaluation" +msgstr "执行性能评估" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:251 +msgid "Text-only benchmarks:" +msgstr "纯文本基准测试:" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:273 +msgid "Multi-modal benchmarks (text + images):" +msgstr "多模态基准测试(文本 + 图像):" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:280 +msgid "" +"After execution, you can get the result from saved files, there is an " +"example as follows:" +msgstr "执行后,您可以从保存的文件中获取结果,示例如下:" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:300 +msgid "3. Troubleshooting" +msgstr "3. 故障排除" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:302 +msgid "Invalid Image Path Error" +msgstr "无效图像路径错误" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:304 +msgid "If you download the TextVQA dataset following the AISBench documentation:" +msgstr "如果您按照 AISBench 文档下载 TextVQA 数据集:" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:316 +msgid "you may encounter the following error:" +msgstr "您可能会遇到以下错误:" + +#: ../../source/developer_guide/evaluation/using_ais_bench.md:322 +msgid "" +"You need to manually replace the dataset image paths with absolute paths," +" changing `/path/to/benchmark/ais_bench/datasets/textvqa/train_images/` " +"to the actual absolute directory where the images are stored:" +msgstr "您需要手动将数据集图像路径替换为绝对路径,将 `/path/to/benchmark/ais_bench/datasets/textvqa/train_images/` 更改为图像存储的实际绝对目录:" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po index 60ecb041..df91a7a1 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po @@ -4,109 +4,111 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../developer_guide/evaluation/using_evalscope.md:1 +#: ../../source/developer_guide/evaluation/using_evalscope.md:1 msgid "Using EvalScope" msgstr "使用 EvalScope" -#: ../../developer_guide/evaluation/using_evalscope.md:3 +#: ../../source/developer_guide/evaluation/using_evalscope.md:3 msgid "" -"This document will guide you have model inference stress testing and " -"accuracy testing using [EvalScope](https://github.com/modelscope/evalscope)." +"This document will guide you through model inference stress testing and " +"accuracy testing using " +"[EvalScope](https://github.com/modelscope/evalscope)." msgstr "" "本文档将指导您如何使用 [EvalScope](https://github.com/modelscope/evalscope) " "进行模型推理压力测试和精度测试。" -#: ../../developer_guide/evaluation/using_evalscope.md:5 -msgid "1. Online serving" -msgstr "1. 在线服务" +#: ../../source/developer_guide/evaluation/using_evalscope.md:5 +msgid "1. Online server" +msgstr "1. 在线服务器" -#: ../../developer_guide/evaluation/using_evalscope.md:7 +#: ../../source/developer_guide/evaluation/using_evalscope.md:7 msgid "You can run docker container to start the vLLM server on a single NPU:" -msgstr "你可以运行 docker 容器,在单个 NPU 上启动 vLLM 服务器:" +msgstr "你可以运行 Docker 容器,在单个 NPU 上启动 vLLM 服务器:" -#: ../../developer_guide/evaluation/using_evalscope.md:34 -msgid "If your service start successfully, you can see the info shown below:" -msgstr "如果你的服务启动成功,你会看到如下所示的信息:" - -#: ../../developer_guide/evaluation/using_evalscope.md:42 +#: ../../source/developer_guide/evaluation/using_evalscope.md:35 msgid "" -"Once your server is started, you can query the model with input prompts in " -"new terminal:" -msgstr "一旦你的服务器启动后,你可以在新的终端中用输入提示词查询模型:" +"If the vLLM server is started successfully, you can see information shown" +" below:" +msgstr "如果 vLLM 服务器启动成功,你将看到如下所示的信息:" -#: ../../developer_guide/evaluation/using_evalscope.md:55 +#: ../../source/developer_guide/evaluation/using_evalscope.md:43 +msgid "" +"Once your server is started, you can query the model with input prompts " +"in a new terminal:" +msgstr "服务器启动后,你可以在新的终端中使用输入提示词查询模型:" + +#: ../../source/developer_guide/evaluation/using_evalscope.md:56 msgid "2. Install EvalScope using pip" msgstr "2. 使用 pip 安装 EvalScope" -#: ../../developer_guide/evaluation/using_evalscope.md:57 -msgid "You can install EvalScope by using:" -msgstr "你可以使用以下方式安装 EvalScope:" +#: ../../source/developer_guide/evaluation/using_evalscope.md:58 +msgid "You can install EvalScope as follows:" +msgstr "你可以通过以下方式安装 EvalScope:" -#: ../../developer_guide/evaluation/using_evalscope.md:65 -msgid "3. Run gsm8k accuracy test using EvalScope" -msgstr "3. 使用 EvalScope 运行 gsm8k 准确率测试" +#: ../../source/developer_guide/evaluation/using_evalscope.md:66 +msgid "3. Run GSM8K using EvalScope for accuracy testing" +msgstr "3. 使用 EvalScope 运行 GSM8K 进行精度测试" -#: ../../developer_guide/evaluation/using_evalscope.md:67 -msgid "You can `evalscope eval` run gsm8k accuracy test:" -msgstr "你可以使用 `evalscope eval` 运行 gsm8k 准确率测试:" +#: ../../source/developer_guide/evaluation/using_evalscope.md:68 +msgid "You can use `evalscope eval` to run GSM8K for accuracy testing:" +msgstr "你可以使用 `evalscope eval` 运行 GSM8K 进行精度测试:" -#: ../../developer_guide/evaluation/using_evalscope.md:78 -#: ../../developer_guide/evaluation/using_evalscope.md:114 -msgid "After 1-2 mins, the output is as shown below:" -msgstr "1-2 分钟后,输出如下所示:" +#: ../../source/developer_guide/evaluation/using_evalscope.md:80 +#: ../../source/developer_guide/evaluation/using_evalscope.md:117 +msgid "After 1 to 2 minutes, the output is shown below:" +msgstr "1 到 2 分钟后,输出结果如下所示:" -#: ../../developer_guide/evaluation/using_evalscope.md:88 +#: ../../source/developer_guide/evaluation/using_evalscope.md:90 msgid "" -"See more detail in: [EvalScope doc - Model API Service " -"Evaluation](https://evalscope.readthedocs.io/en/latest/get_started/basic_usage.html#model-" -"api-service-evaluation)." +"See more details in [EvalScope doc - Model API Service " +"Evaluation](https://evalscope.readthedocs.io/en/latest/get_started/basic_usage.html" +"#model-api-service-evaluation)." msgstr "" -"更多详情请见:[EvalScope 文档 - 模型 API " -"服务评测](https://evalscope.readthedocs.io/en/latest/get_started/basic_usage.html#model-" -"api-service-evaluation)。" +"更多详情请参阅 [EvalScope 文档 - 模型 API " +"服务评估](https://evalscope.readthedocs.io/en/latest/get_started/basic_usage.html" +"#model-api-service-evaluation)。" -#: ../../developer_guide/evaluation/using_evalscope.md:90 +#: ../../source/developer_guide/evaluation/using_evalscope.md:92 msgid "4. Run model inference stress testing using EvalScope" msgstr "4. 使用 EvalScope 运行模型推理压力测试" -#: ../../developer_guide/evaluation/using_evalscope.md:92 +#: ../../source/developer_guide/evaluation/using_evalscope.md:94 msgid "Install EvalScope[perf] using pip" msgstr "使用 pip 安装 EvalScope[perf]" -#: ../../developer_guide/evaluation/using_evalscope.md:98 +#: ../../source/developer_guide/evaluation/using_evalscope.md:100 msgid "Basic usage" msgstr "基本用法" -#: ../../developer_guide/evaluation/using_evalscope.md:100 -msgid "You can use `evalscope perf` run perf test:" +#: ../../source/developer_guide/evaluation/using_evalscope.md:102 +msgid "You can use `evalscope perf` to run perf testing:" msgstr "你可以使用 `evalscope perf` 运行性能测试:" -#: ../../developer_guide/evaluation/using_evalscope.md:112 +#: ../../source/developer_guide/evaluation/using_evalscope.md:115 msgid "Output results" msgstr "输出结果" -#: ../../developer_guide/evaluation/using_evalscope.md:173 +#: ../../source/developer_guide/evaluation/using_evalscope.md:176 msgid "" -"See more detail in: [EvalScope doc - Model Inference Stress " -"Testing](https://evalscope.readthedocs.io/en/latest/user_guides/stress_test/quick_start.html#basic-" -"usage)." +"See more detail in [EvalScope doc - Model Inference Stress " +"Testing](https://evalscope.readthedocs.io/en/latest/user_guides/stress_test/quick_start.html" +"#basic-usage)." msgstr "" -"更多详情见:[EvalScope 文档 - " -"模型推理压力测试](https://evalscope.readthedocs.io/en/latest/user_guides/stress_test/quick_start.html#basic-" -"usage)。" +"更多详情请参阅 [EvalScope 文档 - " +"模型推理压力测试](https://evalscope.readthedocs.io/en/latest/user_guides/stress_test/quick_start.html" +"#basic-usage)。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po index 69c52cb0..a61fdb16 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po @@ -4,62 +4,115 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../developer_guide/evaluation/using_lm_eval.md:1 +#: ../../source/developer_guide/evaluation/using_lm_eval.md:1 msgid "Using lm-eval" msgstr "使用 lm-eval" -#: ../../developer_guide/evaluation/using_lm_eval.md:2 +#: ../../source/developer_guide/evaluation/using_lm_eval.md:3 +msgid "This document guides you to conduct accuracy testing using [lm-eval][1]." +msgstr "本文档指导您如何使用 [lm-eval][1] 进行准确率测试。" + +#: ../../source/developer_guide/evaluation/using_lm_eval.md:5 +msgid "Online Server" +msgstr "在线服务器" + +#: ../../source/developer_guide/evaluation/using_lm_eval.md:7 +msgid "1. Start the vLLM server" +msgstr "1. 启动 vLLM 服务器" + +#: ../../source/developer_guide/evaluation/using_lm_eval.md:9 +msgid "You can run docker container to start the vLLM server on a single NPU:" +msgstr "您可以在单个 NPU 上运行 Docker 容器来启动 vLLM 服务器:" + +#: ../../source/developer_guide/evaluation/using_lm_eval.md:38 +msgid "The vLLM server is started successfully, if you see logs as below:" +msgstr "如果您看到如下日志,则表示 vLLM 服务器已成功启动:" + +#: ../../source/developer_guide/evaluation/using_lm_eval.md:46 +#: ../../source/developer_guide/evaluation/using_lm_eval.md:175 +msgid "2. Run GSM8K using lm-eval for accuracy testing" +msgstr "2. 使用 lm-eval 运行 GSM8K 进行准确率测试" + +#: ../../source/developer_guide/evaluation/using_lm_eval.md:48 +msgid "You can query the result with input prompts:" +msgstr "您可以使用输入提示词查询结果:" + +#: ../../source/developer_guide/evaluation/using_lm_eval.md:75 +msgid "The output format matches the following:" +msgstr "输出格式符合以下形式:" + +#: ../../source/developer_guide/evaluation/using_lm_eval.md:105 +#: ../../source/developer_guide/evaluation/using_lm_eval.md:177 +msgid "Install lm-eval in the container:" +msgstr "在容器中安装 lm-eval:" + +#: ../../source/developer_guide/evaluation/using_lm_eval.md:114 +#: ../../source/developer_guide/evaluation/using_lm_eval.md:186 msgid "" -"This document will guide you have a accuracy testing using [lm-" -"eval](https://github.com/EleutherAI/lm-evaluation-harness)." -msgstr "" -"本文将指导你如何使用 [lm-eval](https://github.com/EleutherAI/lm-evaluation-harness) " -"进行准确率测试。" +"The Docker container is launched with `VLLM_USE_MODELSCOPE=True`, which " +"may cause lm-eval to download datasets from ModelScope instead of " +"HuggingFace. Setting `USE_MODELSCOPE_HUB=0` disables this behavior so " +"that lm-eval can fetch datasets from HuggingFace correctly." +msgstr "Docker 容器以 `VLLM_USE_MODELSCOPE=True` 启动,这可能导致 lm-eval 从 ModelScope 而非 HuggingFace 下载数据集。设置 `USE_MODELSCOPE_HUB=0` 可禁用此行为,使 lm-eval 能够正确从 HuggingFace 获取数据集。" -#: ../../developer_guide/evaluation/using_lm_eval.md:4 -msgid "1. Run docker container" -msgstr "1. 运行 docker 容器" - -#: ../../developer_guide/evaluation/using_lm_eval.md:6 -msgid "You can run docker container on a single NPU:" -msgstr "你可以在单个NPU上运行docker容器:" - -#: ../../developer_guide/evaluation/using_lm_eval.md:33 -msgid "2. Run ceval accuracy test using lm-eval" -msgstr "2. 使用 lm-eval 运行 ceval 准确性测试" - -#: ../../developer_guide/evaluation/using_lm_eval.md:34 -msgid "Install lm-eval in the container." -msgstr "在容器中安装 lm-eval。" - -#: ../../developer_guide/evaluation/using_lm_eval.md:39 +#: ../../source/developer_guide/evaluation/using_lm_eval.md:120 +#: ../../source/developer_guide/evaluation/using_lm_eval.md:192 msgid "Run the following command:" msgstr "运行以下命令:" -#: ../../developer_guide/evaluation/using_lm_eval.md:50 -msgid "After 1-2 mins, the output is as shown below:" -msgstr "1-2 分钟后,输出如下所示:" +#: ../../source/developer_guide/evaluation/using_lm_eval.md:131 +msgid "After 30 minutes, the output is as shown below:" +msgstr "30 分钟后,输出如下所示:" -#: ../../developer_guide/evaluation/using_lm_eval.md:62 +#: ../../source/developer_guide/evaluation/using_lm_eval.md:143 +msgid "Offline Server" +msgstr "离线服务器" + +#: ../../source/developer_guide/evaluation/using_lm_eval.md:145 +msgid "1. Run docker container" +msgstr "1. 运行 docker 容器" + +#: ../../source/developer_guide/evaluation/using_lm_eval.md:147 +msgid "You can run docker container on a single NPU:" +msgstr "您可以在单个 NPU 上运行 docker 容器:" + +#: ../../source/developer_guide/evaluation/using_lm_eval.md:203 +msgid "After 1 to 2 minutes, the output is shown below:" +msgstr "1 到 2 分钟后,输出如下所示:" + +#: ../../source/developer_guide/evaluation/using_lm_eval.md:215 +msgid "Use Offline Datasets" +msgstr "使用离线数据集" + +#: ../../source/developer_guide/evaluation/using_lm_eval.md:217 msgid "" -"You can see more usage on [Lm-eval Docs](https://github.com/EleutherAI/lm-" -"evaluation-harness/blob/main/docs/README.md)." -msgstr "" -"你可以在 [Lm-eval 文档](https://github.com/EleutherAI/lm-evaluation-" -"harness/blob/main/docs/README.md) 上查看更多用法。" +"Take GSM8K (single dataset) and MMLU (multi-subject dataset) as examples," +" and you can see more from [using-local-datasets][2]." +msgstr "以 GSM8K(单数据集)和 MMLU(多学科数据集)为例,您可以在 [using-local-datasets][2] 中查看更多信息。" + +#: ../../source/developer_guide/evaluation/using_lm_eval.md:231 +msgid "Set [gsm8k.yaml][3] as follows:" +msgstr "按如下方式设置 [gsm8k.yaml][3]:" + +#: ../../source/developer_guide/evaluation/using_lm_eval.md:294 +msgid "Set [_default_template_yaml][4] as follows:" +msgstr "按如下方式设置 [_default_template_yaml][4]:" + +#: ../../source/developer_guide/evaluation/using_lm_eval.md:317 +msgid "You can see more usage on [Lm-eval Docs][5]." +msgstr "您可以在 [Lm-eval 文档][5] 中查看更多用法。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po index 41c00d66..c445d12a 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po @@ -4,80 +4,79 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../developer_guide/evaluation/using_opencompass.md:1 +#: ../../source/developer_guide/evaluation/using_opencompass.md:1 msgid "Using OpenCompass" msgstr "使用 OpenCompass" -#: ../../developer_guide/evaluation/using_opencompass.md:2 +#: ../../source/developer_guide/evaluation/using_opencompass.md:3 msgid "" -"This document will guide you have a accuracy testing using " +"This document guides you to conduct accuracy testing using " "[OpenCompass](https://github.com/open-compass/opencompass)." msgstr "" "本文档将指导你如何使用 [OpenCompass](https://github.com/open-compass/opencompass) " "进行准确率测试。" -#: ../../developer_guide/evaluation/using_opencompass.md:4 -msgid "1. Online Serving" +#: ../../source/developer_guide/evaluation/using_opencompass.md:5 +msgid "1. Online Server" msgstr "1. 在线服务" -#: ../../developer_guide/evaluation/using_opencompass.md:6 -msgid "You can run docker container to start the vLLM server on a single NPU:" -msgstr "你可以运行 docker 容器,在单个 NPU 上启动 vLLM 服务器:" +#: ../../source/developer_guide/evaluation/using_opencompass.md:7 +msgid "You can run a docker container to start the vLLM server on a single NPU:" +msgstr "你可以运行一个 Docker 容器,在单个 NPU 上启动 vLLM 服务器:" -#: ../../developer_guide/evaluation/using_opencompass.md:32 -msgid "If your service start successfully, you can see the info shown below:" -msgstr "如果你的服务启动成功,你会看到如下所示的信息:" +#: ../../source/developer_guide/evaluation/using_opencompass.md:35 +msgid "The vLLM server is started successfully, if you see information as below:" +msgstr "如果看到如下信息,则表明 vLLM 服务器已成功启动:" -#: ../../developer_guide/evaluation/using_opencompass.md:39 +#: ../../source/developer_guide/evaluation/using_opencompass.md:43 msgid "" -"Once your server is started, you can query the model with input prompts in " -"new terminal:" -msgstr "一旦你的服务器启动后,你可以在新的终端中用输入提示词查询模型:" +"Once your server is started, you can query the model with input prompts " +"in a new terminal." +msgstr "服务器启动后,你可以在新的终端中使用输入提示词来查询模型。" -#: ../../developer_guide/evaluation/using_opencompass.md:51 -msgid "2. Run ceval accuracy test using OpenCompass" -msgstr "2. 使用 OpenCompass 运行 ceval 准确率测试" +#: ../../source/developer_guide/evaluation/using_opencompass.md:56 +msgid "2. Run C-Eval using OpenCompass for accuracy testing" +msgstr "2. 使用 OpenCompass 运行 C-Eval 进行准确率测试" -#: ../../developer_guide/evaluation/using_opencompass.md:52 +#: ../../source/developer_guide/evaluation/using_opencompass.md:58 msgid "" "Install OpenCompass and configure the environment variables in the " -"container." -msgstr "在容器中安装 OpenCompass 并配置环境变量。" +"container:" +msgstr "在容器中安装 OpenCompass 并配置环境变量:" -#: ../../developer_guide/evaluation/using_opencompass.md:64 +#: ../../source/developer_guide/evaluation/using_opencompass.md:70 msgid "" -"Add `opencompass/configs/eval_vllm_ascend_demo.py` with the following " -"content:" -msgstr "添加 `opencompass/configs/eval_vllm_ascend_demo.py`,内容如下:" +"Add the following content to " +"`opencompass/configs/eval_vllm_ascend_demo.py`:" +msgstr "将以下内容添加到 `opencompass/configs/eval_vllm_ascend_demo.py` 文件中:" -#: ../../developer_guide/evaluation/using_opencompass.md:104 +#: ../../source/developer_guide/evaluation/using_opencompass.md:110 msgid "Run the following command:" msgstr "运行以下命令:" -#: ../../developer_guide/evaluation/using_opencompass.md:110 -msgid "After 1-2 mins, the output is as shown below:" -msgstr "1-2 分钟后,输出如下所示:" +#: ../../source/developer_guide/evaluation/using_opencompass.md:116 +msgid "After 1 to 2 minutes, the output is shown below:" +msgstr "1 到 2 分钟后,输出结果如下所示:" -#: ../../developer_guide/evaluation/using_opencompass.md:120 +#: ../../source/developer_guide/evaluation/using_opencompass.md:126 msgid "" "You can see more usage on [OpenCompass " "Docs](https://opencompass.readthedocs.io/en/latest/index.html)." msgstr "" "你可以在 [OpenCompass " -"文档](https://opencompass.readthedocs.io/en/latest/index.html) 查看更多用法。" +"文档](https://opencompass.readthedocs.io/en/latest/index.html) 中查看更多用法。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/msprobe_guide.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/msprobe_guide.po index cd0f32d6..5e519a76 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/msprobe_guide.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/msprobe_guide.po @@ -6,183 +6,187 @@ # msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2025-11-21 10:19+0800\n" -"PO-Revision-Date: 2025-11-21 10:31\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: 2025-11-21 10:31+0000\n" "Last-Translator: Codex \n" -"Language-Team: Chinese (Simplified) \n" "Language: zh_CN\n" +"Language-Team: Chinese (Simplified) \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../developer_guide/performance_and_performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:1 msgid "MSProbe Debugging Guide" msgstr "MSProbe 调试指南" -#: ../../developer_guide/performance_and_performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:3 msgid "" -"During inference or training runs we often encounter accuracy anomalies such" -" as outputs drifting away from the expectation, unstable numerical behavior " -"(NaN/Inf), or predictions that no longer match the labels. To pinpoint the " -"root cause we have to monitor and capture intermediate data produced while " -"the model executes—feature maps, weights, activations, and layer outputs. By" -" capturing key tensors at specific stages, logging I/O pairs for the core " -"layers, and retaining contextual metadata (prompts, tensor dtypes, hardware " -"configuration, etc.), we can systematically trace where the accuracy " -"degradation or numerical error started. This guide describes the end-to-end " -"workflow for diagnosing accuracy issues for AI models (with a focus on vllm-" -"ascend services): preparation, data capture, and analysis & verification." +"During inference or training runs we often encounter accuracy anomalies " +"such as outputs drifting away from the expectation, unstable numerical " +"behavior (NaN/Inf), or predictions that no longer match the labels. To " +"pinpoint the root cause we have to monitor and capture intermediate data " +"produced while the model executes—feature maps, weights, activations, and" +" layer outputs. By capturing key tensors at specific stages, logging I/O " +"pairs for the core layers, and retaining contextual metadata (prompts, " +"tensor dtypes, hardware configuration, etc.), we can systematically trace" +" where the accuracy degradation or numerical error started. This guide " +"describes the end-to-end workflow for diagnosing accuracy issues for AI " +"models (with a focus on vllm-ascend services): preparation, data capture," +" and analysis & verification." msgstr "" "在推理或训练过程中,我们经常会遇到输出偏离预期、出现 NaN/Inf " "等数值不稳定现象,或者模型预测与标签不一致等精度异常。要定位根因,就必须监控并采集模型执行过程中的中间数据——例如特征图、权重、激活值及各层输出。通过在关键阶段捕获核心张量、记录核心层的输入输出对,并保留提示词、张量" " dtype、硬件配置等上下文元数据,我们可以系统追踪精度退化或数值错误的源头。本指南聚焦 vllm-ascend 服务,介绍 AI " "模型精度问题排查的完整流程:准备、数据采集以及分析与验证。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:5 msgid "0. Background Concepts" msgstr "0. 前置概念" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:7 msgid "`msprobe` supports three accuracy levels:" msgstr "`msprobe` 支持三种精度级别:" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:9 msgid "" -"**L0**: dumps tensors at the module level and generates `construct.json` so " -"that visualization tools can rebuild the network structure. A model or " -"submodule handle must be passed in." -msgstr "**L0**:在`nn.Module`级别保存`tensor`,并生成 `construct.json` 以便可视化工具还原网络结构,需要传入模型或子模块句柄。" +"**L0**: dumps tensors at the module level and generates `construct.json` " +"so that visualization tools can rebuild the network structure. A model or" +" submodule handle must be passed in." +msgstr "" +"**L0**:在`nn.Module`级别保存`tensor`,并生成 `construct.json` " +"以便可视化工具还原网络结构,需要传入模型或子模块句柄。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:10 msgid "" "**L1**: collects operator-level statistics only, which is suitable for " "lightweight troubleshooting." msgstr "**L1**:仅采集算子级统计信息,适合轻量排查。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:11 msgid "" -"**mix**: captures both structural information and operator statistics, which" -" is useful when you need both graph reconstruction and numerical " +"**mix**: captures both structural information and operator statistics, " +"which is useful when you need both graph reconstruction and numerical " "comparisons." msgstr "**mix**:同时获取结构信息与算子统计,适用于既要构图又要进行数值对比的场景。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:13 msgid "1. Prerequisites" msgstr "1. 前提条件" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:15 msgid "1.1 Install `msprobe`" msgstr "1.1 安装 `msprobe`" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:17 msgid "Install msprobe with pip:" msgstr "使用 pip 安装 msprobe:" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:23 msgid "1.2 Visualization dependencies (optional)" msgstr "1.2 可视化依赖(可选)" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:25 msgid "" -"Install additional dependencies if you need to visualize the captured data." +"Install additional dependencies if you need to visualize the captured " +"data." msgstr "如需对采集的数据进行可视化,请安装以下依赖。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:27 msgid "Install `tb_graph_ascend`:" msgstr "安装 `tb_graph_ascend`:" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:33 msgid "2. Collecting Data with `msprobe`" msgstr "2. 使用 `msprobe` 采集数据" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:35 msgid "" -"We generally follow a coarse-to-fine strategy when capturing data. First " -"identify the token where the issue shows up, and then decide which range " -"needs to be sampled around that token. The typical workflow is described " -"below." +"We generally follow a coarse-to-fine strategy when capturing data. First," +" identify the token where the issue shows up, and then decide which range" +" needs to be sampled around that token. The typical workflow is described" +" below." msgstr "采集通常遵循由粗到细的策略:先确定问题出现的 token,再围绕该 token 决定采样范围,常规流程如下。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:37 msgid "2.1 Prepare the dump configuration file" msgstr "2.1 准备 dump 配置文件" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:39 msgid "" -"Create a `config.json` that can be parsed by `PrecisionDebugger` and place " -"it in an accessible path. Common fields are:" +"Create a `config.json` that can be parsed by `PrecisionDebugger` and " +"place it in an accessible path. Common fields are:" msgstr "创建可被 `PrecisionDebugger` 解析的 `config.json` 并放置在可访问路径,常见字段如下:" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "Field" msgstr "字段" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "Description" msgstr "说明" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "Required" msgstr "必填" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "`task`" msgstr "`task`" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "" "Type of dump task. Common PyTorch values include `\"statistics\"` and " -"`\"tensor\"`. A statistics task collects tensor statistics (mean, variance, " -"max, min, etc.) while a tensor task captures arbitrary tensors." +"`\"tensor\"`. A statistics task collects tensor statistics (mean, " +"variance, max, min, etc.) while a tensor task captures arbitrary tensors." msgstr "" "dump 任务类型。PyTorch 常见取值包括 `\"statistics\"` 和 `\"tensor\"`:statistics " "任务采集张量统计量(均值、方差、最大值、最小值等),tensor 任务可采集任意张量。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "Yes" msgstr "是" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "`dump_path`" msgstr "`dump_path`" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "" -"Directory where dump results are stored. When omitted, `msprobe` uses its " -"default path." +"Directory where dump results are stored. When omitted, `msprobe` uses its" +" default path." msgstr "dump 结果保存目录,未配置时使用 `msprobe` 默认路径。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "No" msgstr "否" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "`rank`" msgstr "`rank`" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "" -"Ranks to sample. An empty list collects every rank. For single-card tasks " -"you must set this field to `[]`." +"Ranks to sample. An empty list collects every rank. For single-card " +"tasks, you must set this field to `[]`." msgstr "指定需要采集的设备 rank,空列表表示全部 rank;单卡任务必须配置为 `[]`。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "`step`" msgstr "`step`" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "Token iteration(s) to sample. An empty list means every iteration." msgstr "指定采集的 token 轮次,空列表表示全部迭代。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "`level`" msgstr "`level`" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "" "Dump level string (`\"L0\"`, `\"L1\"`, or `\"mix\"`). `L0` targets " "`nn.Module`, `L1` targets `torch.api`, and `mix` collects both." @@ -190,372 +194,354 @@ msgstr "" "dump 级别字符串(`\"L0\"`、`\"L1\"`、`\"mix\"`),L0 面向 `nn.Module`,L1 面向 " "`torch.api`,mix 同时采集两者。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "`async_dump`" msgstr "`async_dump`" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "" "Whether to enable asynchronous dump (supported for PyTorch " "`statistics`/`tensor` tasks). Defaults to `false`." msgstr "是否启用异步 dump(PyTorch `statistics`/`tensor` 任务可用),默认 `false`。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "`scope`" msgstr "`scope`" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "Module range to sample. An empty list collects every module." msgstr "指定需要采集的模块范围,空列表表示全部模块。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "`list`" msgstr "`list`" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md msgid "Operator range to sample. An empty list collects every operator." msgstr "指定需要采集的算子范围,空列表表示全部算子。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md -msgid "" -"To restrict the operators that are captured, configure the `list` block:" +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:52 +msgid "To restrict the operators that are captured, configure the `list` block:" msgstr "如需进一步限定算子范围,请配置 `list`:" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:54 msgid "" -"`scope` (list[str]): In PyTorch pynative scenarios this field restricts the " -"dump range. Provide two module or API names that follow the tool's naming " -"convention to lock a range; only data between the two names will be dumped. " -"Examples:" +"`scope` (list[str]): In PyTorch PyNative scenarios this field restricts " +"the dump range. Provide two module or API names that follow the tool's " +"naming convention to lock a range; only data between the two names will " +"be dumped. Examples:" msgstr "" -"`scope`(list[str]):在 PyTorch 动态图场景下用于限定 dump 区间。按照工具命名格式提供两个模块或 API 名称,只会 " -"dump 这一区间内的数据。示例:" +"`scope`(list[str]):在 PyTorch 动态图场景下用于限定 dump 区间。按照工具命名格式提供两个模块或 API 名称,只会" +" dump 这一区间内的数据。示例:" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:62 msgid "" -"The `level` setting determines what can be provided—modules when `level=L0`," -" APIs when `level=L1`, and either modules or APIs when `level=mix`." -msgstr "" -"`level` 的取值决定可配置内容:`level=L0` 填模块名,`level=L1` 填 API 名,`level=mix` 则二者皆可。" +"The `level` setting determines what can be provided—modules when " +"`level=L0`, APIs when `level=L1`, and either modules or APIs when " +"`level=mix`." +msgstr "`level` 的取值决定可配置内容:`level=L0` 填模块名,`level=L1` 填 API 名,`level=mix` 则二者皆可。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:64 msgid "`list` (list[str]): Custom operator list. Options include:" msgstr "`list`(list[str]):用于自定义采集的算子范围,常见方式包括:" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:65 msgid "" -"Supply the full names of specific APIs in PyTorch pynative scenarios to only" -" dump those APIs. Example: `\"list\": [\"Tensor.permute.1.forward\", " -"\"Tensor.transpose.2.forward\", \"Torch.relu.3.backward\"]`." +"Supply the full names of specific APIs in PyTorch pynative scenarios to " +"only dump those APIs. Example: `\"list\": [\"Tensor.permute.1.forward\", " +"\"Tensor.transpose.2.forward\", \"Torch.relu.3.forward\"]`." msgstr "" "在 PyTorch 动态图场景中配置 API 全称,仅 dump 这些 API,例如 `\"list\": " "[\"Tensor.permute.1.forward\", \"Tensor.transpose.2.forward\", " -"\"Torch.relu.3.backward\"]`。" +"\"Torch.relu.3.forward\"]`。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:66 msgid "" -"When `level=mix`, you can provide module names so that the dump expands to " -"everything produced while the module is running. Example: `\"list\": " +"When `level=mix`, you can provide module names so that the dump expands " +"to everything produced while the module is running. Example: `\"list\": " "[\"Module.module.language_model.encoder.layers.0.mlp.ParallelMlp.forward.0\"]`." msgstr "" "当 `level=mix` 时可以填写模块名称,工具会在该模块执行期间展开并 dump 所有数据,例如 `\"list\": " "[\"Module.module.language_model.encoder.layers.0.mlp.ParallelMlp.forward.0\"]`。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:67 msgid "" -"Provide a substring such as `\"list\": [\"relu\"]` to dump every API whose " -"name contains the substring. When `level=mix`, modules whose names contain " -"the substring are also expanded." +"Provide a substring such as `\"list\": [\"relu\"]` to dump every API " +"whose name contains the substring. When `level=mix`, modules whose names " +"contain the substring are also expanded." msgstr "" "也可以仅提供子串(如 `\"list\": [\"relu\"]`),会 dump 名称包含该字符串的 API,且 `level=mix` " "时会展开名称包含该字符串的模块。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:69 msgid "Example configuration:" msgstr "示例配置:" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md -msgid "2. Enable `msprobe` in vllm-ascend" -msgstr "2. 在 vllm-ascend 中启用 `msprobe`" +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:90 +msgid "3. Enable `msprobe` in vllm-ascend" +msgstr "3. 在 vllm-ascend 中启用 `msprobe`" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:92 msgid "" -"Start vLLM in eager mode by adding `--enforce-eager` (static-graph scenarios" -" are not supported yet) and pass the config path through `--additional-" -"config`:" +"Start vLLM in eager mode by adding `--enforce-eager` (static-graph " +"scenarios are not supported yet) and pass the config path through " +"`--additional-config`:" msgstr "" -"通过添加 `--enforce-eager` 以 eager 模式启动 vLLM(静态图暂不支持),并通过 `--additional-config` " -"传入配置路径:" +"通过添加 `--enforce-eager` 以 eager 模式启动 vLLM(静态图暂不支持),并通过 `--additional-" +"config` 传入配置路径:" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md -msgid "3. Send requests and collect dumps" -msgstr "3. 发送请求并采集 dump" +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:103 +msgid "4. Send requests and collect dumps" +msgstr "4. 发送请求并采集 dump" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:105 msgid "Send inference requests as usual, for example:" msgstr "按常规方式发送推理请求,例如:" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:118 msgid "" -"Each request drives the sequence `msprobe: start -> forward/backward -> stop" -" -> step`. The runner invokes `step()` on every code path, so you always get" -" a complete dataset even if inference returns early." +"Each request drives the sequence `msprobe: start -> forward -> stop -> " +"step`. The runner invokes `step()` on every code path, so you always get " +"a complete dataset even if inference returns early." msgstr "" -"每个请求都会执行 `msprobe: start -> forward/backward -> stop -> step`,Runner " +"每个请求都会执行 `msprobe: start -> forward -> stop -> step`,Runner " "在所有路径都会调用 `step()`,即使推理提前结束也能拿到完整数据。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:120 msgid "Dump files are written into `dump_path`. They usually contain:" msgstr "dump 文件写入 `dump_path`,通常包含:" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:121 msgid "Tensor files grouped by operator/module." msgstr "按算子或模块划分的张量文件。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:122 msgid "" "`dump.json`, which records metadata such as dtype, shape, min/max, and " "`requires_grad`." msgstr "描述 dtype、shape、最小/最大值以及 `requires_grad` 等信息的 `dump.json`。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:123 msgid "" -"`construct.json`, which is generated when `level` is `L0` or `mix` (required" -" for visualization)." +"`construct.json`, which is generated when `level` is `L0` or `mix` " +"(required for visualization)." msgstr "当级别为 `L0` 或 `mix` 时生成的 `construct.json`(可视化必需)。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:125 msgid "Example directory layout:" msgstr "目录结构示例:" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md -#, python-brace-format +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:156 msgid "" -"`rank`: Device ID. Each card writes its data to the corresponding `rank{ID}`" -" directory. In non-distributed scenarios the directory is simply named " -"`rank`." +"`rank`: Device ID. Each card writes its data to the corresponding " +"`rank{ID}` directory. In non-distributed scenarios the directory is " +"simply named `rank`." msgstr "`rank`:设备 ID。每张卡写入对应的 `rank{ID}` 目录,非分布式场景目录名称为 `rank`。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:157 msgid "`dump_tensor_data`: Tensor payloads that were collected." msgstr "`dump_tensor_data`:采集到的张量数据。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:158 msgid "" -"`dump.json`: Statistics for the forward/backward data of each API or module," -" including names, dtype, shape, max, min, mean, L2 norm (square root of the " -"L2 variance), and CRC-32 when `summary_mode=\"md5\"`. See [dump.json file " -"description](#dumpjson-file-description) for details." +"`dump.json`: Statistics for the forward data of each API or module, " +"including names, dtype, shape, max, min, mean, L2 norm (square root of " +"the L2 variance), and CRC-32 when `summary_mode=\"md5\"`. See [dump.json " +"file description](#dumpjson-file-description) for details." msgstr "" -"`dump.json`:保存各 API 或模块前/反向数据统计,包含名称、dtype、shape、max、min、mean、L2 " -"norm(平方根)以及在 `summary_mode=\"md5\"` 下的 CRC-32。详见 [dump.json file " -"description](#dumpjson-file-description)。" +"`dump.json`:各 API 或模块前向数据的统计信息,包括名称、dtype、shape、最大值、最小值、平均值、L2 范数(L2 方差的平方根),以及在 `summary_mode=\"md5\"` 时的 CRC-32 值。详见 [dump.json 文件说明](#dumpjson-file-description)。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:159 msgid "" -"`dump_error_info.log`: Present only when the dump tool encountered an error " -"and records the failure log." -msgstr "`dump_error_info.log`:仅在 dump 工具报错时生成,记录错误日志。" +"`dump_error_info.log`: Present only when the dump tool encountered an " +"error and records the failure log." +msgstr "`dump_error_info.log`:仅在 dump 工具遇到错误时生成,记录失败日志。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:160 msgid "`stack.json`: Call stacks for APIs/modules." -msgstr "`stack.json`:API/Module 的调用栈信息。" +msgstr "`stack.json`:API/模块的调用栈信息。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:161 msgid "" -"`construct.json`: Hierarchical structure description. Empty when `level=L1`." -msgstr "`construct.json`:分层结构描述,`level=L1` 时为空。" +"`construct.json`: Hierarchical structure description. Empty when " +"`level=L1`." +msgstr "`construct.json`:分层结构描述,当 `level=L1` 时为空。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md -msgid "4. Analyze the results" -msgstr "4. 分析结果" +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:163 +msgid "5. Analyze the results" +msgstr "5. 分析结果" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md -msgid "4.1 Prerequisites" -msgstr "4.1 前置条件" +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:165 +msgid "5.1 Prerequisites" +msgstr "5.1 前置条件" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:167 msgid "" -"You typically need two dump datasets: one from the \"problem side\" (the run" -" that exposes the accuracy or numerical error) and another from the " +"You typically need two dump datasets: one from the \"problem side\" (the " +"run that exposes the accuracy or numerical error) and another from the " "\"benchmark side\" (a good baseline). These datasets do not have to be " -"identical—they can come from different branches, framework versions, or even" -" alternative implementations (operator substitutions, different graph-" -"optimization switches, etc.). As long as they use the same or similar " -"inputs, hardware topology, and sampling points (step/token), `msprobe` can " -"compare them and locate the divergent nodes. If you cannot find a perfectly " -"clean benchmark, start by capturing the problem-side data, craft the " -"smallest reproducible case by hand, and perform a self-comparison. Below we " -"assume the problem dump is `problem_dump` and the benchmark dump is " -"`bench_dump`." +"identical—they can come from different branches, framework versions, or " +"even alternative implementations (operator substitutions, different " +"graph-optimization switches, etc.). As long as they use the same or " +"similar inputs, hardware topology, and sampling points (step/token), " +"`msprobe` can compare them and locate the divergent nodes. If you cannot " +"find a perfectly clean benchmark, start by capturing the problem-side " +"data, craft the smallest reproducible case by hand, and perform a self-" +"comparison. Below we assume the problem dump is `problem_dump` and the " +"benchmark dump is `bench_dump`." msgstr "" -"通常需要准备两份 dump " -"数据:一份来自出现精度或数值异常的“问题侧”,另一份来自表现正常的“标杆侧”。两份数据无需完全一致,可以来自不同分支、不同框架版本,甚至不同实现(算子替换、图优化开关差异等)。只要输入、硬件拓扑和采样点(step/token)保持一致或相近,msprobe" -" 就能对比并定位差异节点。若无法找到足够干净的标杆,可先采集问题侧数据,手动构造最小复现用例并进行自对比。下文默认问题侧目录为 " -"`problem_dump`,标杆侧为 `bench_dump`。" +"通常需要两份 dump 数据集:一份来自“问题侧”(暴露精度或数值错误的运行),另一份来自“标杆侧”(良好的基线)。这些数据集不必完全相同——它们可以来自不同的分支、框架版本,甚至是替代实现(算子替换、不同的图优化开关等)。只要它们使用相同或相似的输入、硬件拓扑和采样点(step/token),`msprobe` 就可以比较它们并定位差异节点。如果找不到完全干净的标杆,可以先捕获问题侧数据,手动构建最小的可复现案例,并进行自比较。下文假设问题侧 dump 为 `problem_dump`,标杆侧 dump 为 `bench_dump`。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md -msgid "4.2 Visualization" -msgstr "4.2 可视化" +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:169 +msgid "5.2 Visualization" +msgstr "5.2 可视化" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:171 msgid "" -"Use `msprobe graph_visualize` to generate results that can be opened inside " -"`tb_graph_ascend`." -msgstr "使用 `msprobe graph_visualize` 生成结果,并在 `tb_graph_ascend` 中查看。" +"Use `msprobe -f pytorch graph` to generate results that can be opened " +"inside `tb_graph_ascend`." +msgstr "使用 `msprobe -f pytorch graph` 生成结果,可在 `tb_graph_ascend` 中打开。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:173 msgid "" -"Ensure the dump contains `construct.json` (i.e., `level = L0` or `level = " -"mix`)." -msgstr "确保 dump 中包含 `construct.json`(即 `level=L0` 或 `level=mix`)。" +"Ensure the dump contains `construct.json` (i.e., `level = L0` or `level =" +" mix`)." +msgstr "确保 dump 包含 `construct.json`(即 `level = L0` 或 `level = mix`)。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:174 msgid "" -"Prepare a comparison file such as `compare.json`. Its format and generation " -"flow are described in section 3.1.3 of `msprobe_visualization.md`. Example " -"(minimal runnable snippet):" -msgstr "" -"准备 `compare.json` 等对比文件,其格式与生成方式见 `msprobe_visualization.md` 3.1.3 节。示例:" +"Prepare a comparison file such as `compare.json`. Its format and " +"generation flow are described in section 3.1.3 of " +"`msprobe_visualization.md`. Example (minimal runnable snippet):" +msgstr "准备一个比较文件,例如 `compare.json`。其格式和生成流程在 `msprobe_visualization.md` 的 3.1.3 节中描述。示例(最小可运行片段):" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:184 msgid "" -"Replace the paths with your dump directories before invoking `msprobe " -"graph_visualize`. **If you only need to build a single graph**, omit " +"Replace the paths with your dump directories before invoking `msprobe -f " +"pytorch graph`. **If you only need to build a single graph**, omit " "`bench_path` to visualize one dump. Multi-rank scenarios (single rank, " "multi-rank, or multi-step multi-rank) are also supported. `npu_path` or " -"`bench_path` must contain folders named `rank+number`, and every rank folder" -" must contain a non-empty `construct.json` together with `dump.json` and " -"`stack.json`. If any `construct.json` is empty, verify that the dump level " -"includes `L0` or `mix`. When comparing graphs, both `npu_path` and " -"`bench_path` must contain the same set of rank folders so they can be paired" -" one-to-one." +"`bench_path` must contain folders named `rank+number`, and every rank " +"folder must contain a non-empty `construct.json` together with " +"`dump.json` and `stack.json`. If any `construct.json` is empty, verify " +"that the dump level includes `L0` or `mix`. When comparing graphs, both " +"`npu_path` and `bench_path` must contain the same set of rank folders so " +"they can be paired one-to-one." msgstr "" -"在执行 `msprobe graph_visualize` 前,将路径替换为实际 dump 目录。**若只需构建单图**,可省略 " -"`bench_path`。单 rank、多 rank 以及多 step 多 rank 场景均受支持:`npu_path` 或 `bench_path` " -"下必须只有名为 `rank+数字` 的文件夹,并且每个 rank 目录都包含非空的 `construct.json`、`dump.json` 与 " -"`stack.json`。若某个 `construct.json` 为空,请确认 dump 级别包含 L0 或 mix。做图比较时,两侧的 rank " -"目录数量和名称必须一一对应。" +"在调用 `msprobe -f pytorch graph` 之前,将路径替换为你的 dump 目录。**如果只需要构建单个图**,省略 `bench_path` 以可视化一个 dump。多 rank 场景(单 rank、多 rank 或多 step 多 rank)也受支持。`npu_path` 或 `bench_path` 必须包含名为 `rank+数字` 的文件夹,并且每个 rank 文件夹必须包含一个非空的 `construct.json` 以及 `dump.json` 和 `stack.json`。如果任何 `construct.json` 为空,请验证 dump 级别是否包含 `L0` 或 `mix`。比较图时,`npu_path` 和 `bench_path` 必须包含相同的 rank 文件夹集合,以便它们可以一一配对。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:209 msgid "Run:" -msgstr "执行:" +msgstr "运行:" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:217 msgid "" "After the comparison finishes, a `*.vis.db` file is created under " "`graph_output`." -msgstr "对比完成后会在 `graph_output` 下生成 `*.vis.db` 文件。" +msgstr "比较完成后,会在 `graph_output` 下创建一个 `*.vis.db` 文件。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:219 #, python-brace-format msgid "Graph build: `build_{timestamp}.vis.db`" msgstr "图构建:`build_{timestamp}.vis.db`" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:220 #, python-brace-format msgid "Graph comparison: `compare_{timestamp}.vis.db`" -msgstr "图对比:`compare_{timestamp}.vis.db`" +msgstr "图比较:`compare_{timestamp}.vis.db`" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:222 msgid "" "Launch `tensorboard` and load the output directory to inspect structural " -"differences, numerical comparisons, overflow detection results, cross-device" -" communication nodes, and filters/search. Pass the directory containing the " -"`.vis.db` files to `--logdir`:" +"differences, numerical comparisons, overflow detection results, cross-" +"device communication nodes, and filters/search. Pass the directory " +"containing the `.vis.db` files to `--logdir`:" msgstr "" -"启动 `tensorboard` 并加载输出目录,可查看结构差异、精度对比、溢出检测、跨卡通信节点以及多级目录搜索/筛选。将包含 `.vis.db` " -"的目录传给 `--logdir`:" +"启动 `tensorboard` 并加载输出目录,以检查结构差异、数值比较、溢出检测结果、跨设备通信节点以及过滤器/搜索。将包含 `.vis.db` 文件的目录传递给 `--logdir`:" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:228 msgid "" "Inspect the visualization. The UI usually displays the overall model " "structure with operators, parameters, and tensor I/O. Click any node to " "expand its children." -msgstr "在可视化界面中可查看模型整体结构(算子、参数、张量 I/O),点击节点可展开其子结构。" +msgstr "检查可视化界面。UI 通常显示包含算子、参数和张量 I/O 的整体模型结构。点击任何节点以展开其子节点。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:229 msgid "" -"**Difference visualization**: Comparison results highlight divergent nodes " -"with different colors (the larger the difference, the redder the node). " -"Click a node to view its detailed information including tensor " -"inputs/outputs, parameters, and operator type. Analyze the data difference " -"and the surrounding connections to pinpoint the exact divergence." +"**Difference visualization**: Comparison results highlight divergent " +"nodes with different colors (the larger the difference, the redder the " +"node). Click a node to view its detailed information including tensor " +"inputs/outputs, parameters, and operator type. Analyze the data " +"difference and the surrounding connections to pinpoint the exact " +"divergence." msgstr "" -"**差异可视化**:对比结果会使用不同颜色突出显示差异节点(差异越大颜色越红)。点击节点可查看输入输出张量、参数以及算子类型,据此结合上下游关系定位具体差异点。" +"**差异可视化**:比较结果用不同颜色突出显示差异节点(差异越大,节点越红)。点击节点可查看其详细信息,包括张量输入/输出、参数和算子类型。分析数据差异和周围连接,以精确定位确切的差异点。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:230 msgid "**Helper features**:" msgstr "**辅助功能**:" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:231 msgid "" "Switch rank/step: Quickly check difference nodes on different ranks and " "steps." -msgstr "切换 rank/step:快速查看不同 rank 和 step 下的差异节点。" +msgstr "切换 rank/step:快速检查不同 rank 和 step 上的差异节点。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:232 +msgid "Search/filter: Use the search box to filter nodes by operator name, etc." +msgstr "搜索/过滤:使用搜索框按算子名称等过滤节点。" + +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:233 msgid "" -"Search/filter: Use the search box to filter nodes by operator name, etc." -msgstr "搜索/筛选:可根据算子名称等快速过滤节点。" +"Manual mapping: Automatic mapping cannot cover every case, so the tool " +"lets you manually map nodes between the problem and benchmark graphs " +"before generating comparison results." +msgstr "手动映射:自动映射无法覆盖所有情况,因此该工具允许你在生成比较结果之前,手动映射问题图和标杆图之间的节点。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md -msgid "" -"Manual mapping: Automatic mapping cannot cover every case, so the tool lets " -"you manually map nodes between the problem and benchmark graphs before " -"generating comparison results." -msgstr "手动映射:当自动映射无法覆盖所有情况时,可手动匹配问题侧与标杆侧节点后再生成对比结果。" +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:235 +msgid "6. Troubleshooting" +msgstr "6. 故障排除" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md -msgid "5. Troubleshooting" -msgstr "5. 故障排查" - -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:237 msgid "" "`RuntimeError: Please enforce eager mode`: Restart vLLM and add the " "`--enforce-eager` flag." -msgstr "" -"`RuntimeError: Please enforce eager mode`:重启 vLLM 并加上 `--enforce-eager` 参数。" +msgstr "`RuntimeError: Please enforce eager mode`:重启 vLLM 并添加 `--enforce-eager` 标志。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:238 msgid "" "No dump files: Confirm that the JSON path is correct and every node has " "write permission. In distributed scenarios set `keep_all_ranks` so that " "every rank writes its own dump." -msgstr "" -"缺少 dump 文件:检查 JSON 路径是否正确、各节点是否具有写权限;分布式场景可启用 `keep_all_ranks` 让每个 rank " -"单独写入。" +msgstr "没有 dump 文件:确认 JSON 路径正确且每个节点都有写权限。在分布式场景中,设置 `keep_all_ranks` 以便每个 rank 写入自己的 dump。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:239 msgid "" "Dumps are too large: Start with a `statistics` task to locate abnormal " -"tensors, then narrow the scope with `scope`/`list`/`tensor_list`, `filters`," -" `token_range`, etc." -msgstr "" -"dump 体积过大:建议先运行 `statistics` 任务定位异常张量,再通过 " -"`scope`/`list`/`tensor_list`、`filters`、`token_range` 等方式缩小范围。" +"tensors, then narrow the scope with `scope`/`list`/`tensor_list`, " +"`filters`, `token_range`, etc." +msgstr "Dump 文件过大:从 `statistics` 任务开始,定位异常张量,然后使用 `scope`/`list`/`tensor_list`、`filters`、`token_range` 等缩小范围。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:243 msgid "Appendix" msgstr "附录" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:245 msgid "dump.json file description" msgstr "dump.json 文件说明" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:247 msgid "L0 level" msgstr "L0 级别" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:249 msgid "" -"An L0 `dump.json` contains forward/backward I/O for modules together with " -"parameters and parameter gradients. Using PyTorch's `Conv2d` as an example, " -"the network code looks like:" -msgstr "" -"L0 级别的 `dump.json` 包含模块的前/反向输入输出以及参数与参数梯度。以下以 PyTorch 的 `Conv2d` 为例,网络代码如下:" +"An L0 `dump.json` contains forward I/O for modules together with " +"parameters. Using PyTorch's `Conv2d` as an example, the network code " +"looks like:" +msgstr "L0 级别的 `dump.json` 包含模块的前向 I/O 以及参数。以 PyTorch 的 `Conv2d` 为例,网络代码如下:" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:251 msgid "" "`output = self.conv2(input) # self.conv2 = torch.nn.Conv2d(64, 128, 5, " "padding=2, bias=True)`" @@ -563,36 +549,19 @@ msgstr "" "`output = self.conv2(input) # self.conv2 = torch.nn.Conv2d(64, 128, 5, " "padding=2, bias=True)`" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:253 msgid "`dump.json` contains the following entries:" msgstr "`dump.json` 包含以下条目:" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:255 msgid "" -"`Module.conv2.Conv2d.forward.0`: Forward data of the module. `input_args` " -"represents positional inputs, `input_kwargs` represents keyword inputs, " +"`Module.conv2.Conv2d.forward.0`: Forward data of the module. `input_args`" +" represents positional inputs, `input_kwargs` represents keyword inputs, " "`output` stores forward outputs, and `parameters` stores weights/biases." msgstr "" -"`Module.conv2.Conv2d.forward.0`:模块的前向数据,`input_args` 为位置参数,`input_kwargs` " -"为关键字参数,`output` 存放前向输出,`parameters` 存放权重和偏置。" +"`Module.conv2.Conv2d.forward.0`:模块的前向数据。`input_args` 表示位置输入,`input_kwargs` 表示关键字输入,`output` 存储前向输出,`parameters` 存储权重/偏置。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md -msgid "" -"`Module.conv2.Conv2d.parameters_grad`: Parameter gradients (weight and " -"bias)." -msgstr "`Module.conv2.Conv2d.parameters_grad`:模块参数的梯度(weight 与 bias)。" - -#: ../../developer_guide/performance_and_debug/msprobe_guide.md -msgid "" -"`Module.conv2.Conv2d.backward.0`: Backward data of the module. `input` " -"represents gradients that flow into the module (gradients of the forward " -"outputs) and `output` represents gradients that flow out (gradients of the " -"module inputs)." -msgstr "" -"`Module.conv2.Conv2d.backward.0`:模块的反向数据,`input` 表示流入模块的梯度(对应前向输出),`output` " -"表示流出的梯度(对应模块输入)。" - -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:257 #, python-brace-format msgid "" "**Note**: When the `model` parameter passed to the dump API is " @@ -600,47 +569,32 @@ msgid "" "include the index inside the list (`{Module}.{index}.*`). Example: " "`Module.0.conv1.Conv2d.forward.0`." msgstr "" -"**说明**:当 dump API 的 `model` 参数为 `List[torch.nn.Module]` 或 " -"`Tuple[torch.nn.Module]` 时,模块级名称会包含其在列表中的索引(`{Module}.{index}.*`),例如 " -"`Module.0.conv1.Conv2d.forward.0`。" +"**注意**:当传递给 dump API 的 `model` 参数是 `List[torch.nn.Module]` 或 `Tuple[torch.nn.Module]` 时,模块级名称包含列表内的索引(`{Module}.{index}.*`)。例如:`Module.0.conv1.Conv2d.forward.0`。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:341 msgid "L1 level" msgstr "L1 级别" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:343 msgid "" -"An L1 `dump.json` records forward/backward I/O for APIs. Using PyTorch's " -"`relu` function as an example (`output = torch.nn.functional.relu(input)`), " -"the file contains:" -msgstr "" -"L1 级别的 `dump.json` 记录 API 的前/反向输入输出。以下以 PyTorch 的 `relu` 函数(`output = " -"torch.nn.functional.relu(input)`)为例:" +"An L1 `dump.json` records forward I/O for APIs. Using PyTorch's `relu` " +"function as an example (`output = torch.nn.functional.relu(input)`), the " +"file contains:" +msgstr "L1 级别的 `dump.json` 记录 API 的前向 I/O。以 PyTorch 的 `relu` 函数为例(`output = torch.nn.functional.relu(input)`),该文件包含:" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:345 msgid "" "`Functional.relu.0.forward`: Forward data of the API. `input_args` are " -"positional inputs, `input_kwargs` are keyword inputs, and `output` stores " -"the forward outputs." -msgstr "" -"`Functional.relu.0.forward`:API 的前向数据,`input_args` 为位置输入,`input_kwargs` " -"为关键字输入,`output` 存放前向输出。" +"positional inputs, `input_kwargs` are keyword inputs, and `output` stores" +" the forward outputs." +msgstr "`Functional.relu.0.forward`:API 的前向数据。`input_args` 是位置输入,`input_kwargs` 是关键字输入,`output` 存储前向输出。" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md -msgid "" -"`Functional.relu.0.backward`: Backward data of the API. `input` represents " -"the gradients of the forward outputs, and `output` represents the gradients " -"that flow back to the forward inputs." -msgstr "" -"`Functional.relu.0.backward`:API 的反向数据,`input` 表示前向输出的梯度,`output` " -"表示回传到前向输入的梯度。" - -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:398 msgid "mix level" msgstr "mix 级别" -#: ../../developer_guide/performance_and_debug/msprobe_guide.md +#: ../../source/developer_guide/performance_and_debug/msprobe_guide.md:400 msgid "" -"A `mix` dump.json contains both L0 and L1 level data; the file format is the" -" same as the examples above." -msgstr "`mix` 级别的 dump.json 同时包含 L0 与 L1 数据,文件格式与上述示例相同。" +"A `mix` dump.json contains both L0 and L1 level data; the file format is " +"the same as the examples above." +msgstr "`mix` 级别的 dump.json 包含 L0 和 L1 级别的数据;文件格式与上述示例相同。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/optimization_and_tuning.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/optimization_and_tuning.po new file mode 100644 index 00000000..fad8d35d --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/optimization_and_tuning.po @@ -0,0 +1,348 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:1 +msgid "Optimization and Tuning" +msgstr "优化与调优" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:3 +msgid "" +"This guide aims to help users improve vLLM-Ascend performance at the " +"system level. It includes OS configuration, library optimization, " +"deployment guide, and so on. Any feedback is welcome." +msgstr "本指南旨在帮助用户在系统层面提升 vLLM-Ascend 的性能。内容包括操作系统配置、库优化、部署指南等。欢迎提供任何反馈。" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:5 +msgid "Preparation" +msgstr "准备工作" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:7 +msgid "Run the container:" +msgstr "运行容器:" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:31 +msgid "Configure your environment:" +msgstr "配置您的环境:" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:49 +msgid "Install vllm and vllm-ascend:" +msgstr "安装 vllm 和 vllm-ascend:" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:61 +msgid "" +"Please follow the [Installation " +"Guide](https://docs.vllm.ai/projects/ascend/en/latest/installation.html) " +"to make sure vLLM and vllm-ascend are installed correctly." +msgstr "请遵循[安装指南](https://docs.vllm.ai/projects/ascend/en/latest/installation.html)以确保 vLLM 和 vllm-ascend 正确安装。" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:64 +msgid "" +"Make sure your vLLM and vllm-ascend are installed after your Python " +"configuration is completed, because these packages will build binary " +"files using python in current environment. If you install vLLM and vllm-" +"ascend before completing section 1.1, the binary files will not use the " +"optimized python." +msgstr "请确保在完成 Python 配置后再安装 vLLM 和 vllm-ascend,因为这些软件包将使用当前环境中的 python 构建二进制文件。如果您在完成第 1.1 节之前就安装了 vLLM 和 vllm-ascend,则二进制文件将不会使用优化后的 python。" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:67 +msgid "Optimizations" +msgstr "优化措施" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:69 +msgid "1. Compilation Optimization" +msgstr "1. 编译优化" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:71 +msgid "1.1. Install optimized `python`" +msgstr "1.1. 安装优化版 `python`" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:73 +msgid "" +"Python supports **LTO** and **PGO** optimization starting from version " +"`3.6` and above, which can be enabled at compile time. And we have " +"offered optimized `python` packages directly to users for the sake of " +"convenience. You can also reproduce the `python` build following this " +"[tutorial](https://www.hiascend.com/document/detail/zh/Pytorch/600/ptmoddevg/trainingmigrguide/performance_tuning_0063.html)" +" according to your specific scenarios." +msgstr "Python 从 `3.6` 及以上版本开始支持 **LTO** 和 **PGO** 优化,可以在编译时启用。为了方便用户,我们直接提供了优化版的 `python` 软件包。您也可以根据具体场景,按照此[教程](https://www.hiascend.com/document/detail/zh/Pytorch/600/ptmoddevg/trainingmigrguide/performance_tuning_0063.html)自行构建 `python`。" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:101 +msgid "2. OS Optimization" +msgstr "2. 操作系统优化" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:103 +msgid "2.1. jemalloc" +msgstr "2.1. jemalloc" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:105 +msgid "" +"**jemalloc** is a memory allocator that improves performance for multi-" +"threaded scenarios and can reduce memory fragmentation. jemalloc uses a " +"local thread memory manager to allocate variables, which can avoid lock " +"competition between threads and can hugely optimize performance." +msgstr "**jemalloc** 是一个内存分配器,可提升多线程场景下的性能并减少内存碎片。jemalloc 使用本地线程内存管理器来分配变量,这可以避免线程间的锁竞争,从而大幅优化性能。" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:117 +msgid "2.2. Tcmalloc" +msgstr "2.2. Tcmalloc" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:119 +msgid "" +"**TCMalloc (Thread Caching Malloc)** is a universal memory allocator that" +" improves overall performance while ensuring low latency by introducing a" +" multi-level cache structure, reducing mutex contention and optimizing " +"large object processing flow. Find more " +"[details](https://www.hiascend.com/document/detail/zh/Pytorch/700/ptmoddevg/trainingmigrguide/performance_tuning_0068.html)." +msgstr "**TCMalloc (Thread Caching Malloc)** 是一个通用内存分配器,通过引入多级缓存结构、减少互斥锁竞争以及优化大对象处理流程,在确保低延迟的同时提升整体性能。更多[详情](https://www.hiascend.com/document/detail/zh/Pytorch/700/ptmoddevg/trainingmigrguide/performance_tuning_0068.html)。" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:140 +msgid "3. `torch_npu` Optimization" +msgstr "3. `torch_npu` 优化" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:142 +msgid "" +"Some performance tuning features in `torch_npu` are controlled by " +"environment variables. Some features and their related environment " +"variables are shown below." +msgstr "`torch_npu` 中的一些性能调优功能由环境变量控制。部分功能及其相关环境变量如下所示。" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:144 +msgid "Memory optimization:" +msgstr "内存优化:" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:155 +msgid "Scheduling optimization:" +msgstr "调度优化:" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:166 +msgid "4. CANN Optimization" +msgstr "4. CANN 优化" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:168 +msgid "4.1. HCCL Optimization" +msgstr "4.1. HCCL 优化" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:170 +msgid "" +"There are some performance tuning features in HCCL, which are controlled " +"by environment variables." +msgstr "HCCL 中有一些性能调优功能,由环境变量控制。" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:172 +msgid "" +"You can configure HCCL to use \"AIV\" mode to optimize performance by " +"setting the environment variable shown below. In \"AIV\" mode, the " +"communication is scheduled by AI vector core directly with RoCE, instead " +"of being scheduled by AI CPU." +msgstr "您可以通过设置如下所示的环境变量,将 HCCL 配置为使用 \"AIV\" 模式以优化性能。在 \"AIV\" 模式下,通信由 AI 向量核通过 RoCE 直接调度,而非由 AI CPU 调度。" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:179 +msgid "" +"Plus, there are more features for performance optimization in specific " +"scenarios, which are shown below." +msgstr "此外,针对特定场景还有更多性能优化功能,如下所示。" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:181 +msgid "" +"`HCCL_INTRA_ROCE_ENABLE`: Use RDMA link instead of SDMA link between two " +"8Ps as the mesh interconnect link. Find more " +"[details](https://www.hiascend.com/document/detail/zh/Pytorch/600/ptmoddevg/trainingmigrguide/performance_tuning_0044.html)." +msgstr "`HCCL_INTRA_ROCE_ENABLE`:在两个 8P 之间使用 RDMA 链路而非 SDMA 链路作为网状互连链路。更多[详情](https://www.hiascend.com/document/detail/zh/Pytorch/600/ptmoddevg/trainingmigrguide/performance_tuning_0044.html)。" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:182 +msgid "" +"`HCCL_RDMA_TC`: Use this var to configure traffic class of RDMA NIC. Find" +" more " +"[details](https://www.hiascend.com/document/detail/zh/Pytorch/600/ptmoddevg/trainingmigrguide/performance_tuning_0045.html)." +msgstr "`HCCL_RDMA_TC`:使用此变量配置 RDMA 网卡的流量类别。更多[详情](https://www.hiascend.com/document/detail/zh/Pytorch/600/ptmoddevg/trainingmigrguide/performance_tuning_0045.html)。" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:183 +msgid "" +"`HCCL_RDMA_SL`: Use this var to configure service level of RDMA NIC. Find" +" more " +"[details](https://www.hiascend.com/document/detail/zh/Pytorch/600/ptmoddevg/trainingmigrguide/performance_tuning_0046.html)." +msgstr "`HCCL_RDMA_SL`:使用此变量配置 RDMA 网卡的服务级别。更多[详情](https://www.hiascend.com/document/detail/zh/Pytorch/600/ptmoddevg/trainingmigrguide/performance_tuning_0046.html)。" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:184 +msgid "" +"`HCCL_BUFFSIZE`: Use this var to control the cache size for sharing data " +"between two NPUs. Find more " +"[details](https://www.hiascend.com/document/detail/zh/Pytorch/600/ptmoddevg/trainingmigrguide/performance_tuning_0047.html)." +msgstr "`HCCL_BUFFSIZE`:使用此变量控制两个 NPU 之间共享数据的缓存大小。更多[详情](https://www.hiascend.com/document/detail/zh/Pytorch/600/ptmoddevg/trainingmigrguide/performance_tuning_0047.html)。" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:186 +msgid "5. OS Optimization" +msgstr "5. 操作系统优化" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:188 +msgid "" +"This section describes operating system–level optimizations applied on " +"the host machine (bare metal or Kubernetes node) to improve performance " +"stability, latency, and throughput for inference workloads." +msgstr "本节描述了在主机(裸机或 Kubernetes 节点)上应用的操作系统级优化,旨在提升推理工作负载的性能稳定性、延迟和吞吐量。" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:191 +msgid "" +"These settings must be applied on the host OS and with root privileges. " +"Not inside containers." +msgstr "这些设置必须在主机操作系统上以 root 权限应用,而不是在容器内部。" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:194 +msgid "5.1" +msgstr "5.1" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:196 +msgid "Set CPU Frequency Governor to `performance`" +msgstr "将 CPU 频率调节器设置为 `performance`" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:202 +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:219 +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:239 +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:261 +msgid "Purpose" +msgstr "目的" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:204 +msgid "Forces all CPU cores to run under the `performance` governor" +msgstr "强制所有 CPU 核心在 `performance` 调节器下运行" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:205 +msgid "Disables dynamic frequency scaling (e.g., `ondemand`, `powersave`)" +msgstr "禁用动态频率调节(例如 `ondemand`、`powersave`)" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:207 +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:223 +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:243 +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:265 +msgid "Benefits" +msgstr "优势" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:209 +msgid "Keeps CPU cores at maximum frequency" +msgstr "使 CPU 核心保持最高频率" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:210 +msgid "Reduces latency jitter" +msgstr "减少延迟抖动" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:211 +msgid "Improves predictability for inference workloads" +msgstr "提高推理工作负载的可预测性" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:213 +msgid "5.2 Disable Swap Usage" +msgstr "5.2 禁用交换空间使用" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:221 +msgid "Minimizes the kernel’s tendency to swap memory pages to disk" +msgstr "最小化内核将内存页交换到磁盘的倾向" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:225 +msgid "Prevents severe latency spikes caused by swapping" +msgstr "防止因交换导致的严重延迟峰值" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:226 +msgid "Improves stability for large in-memory models" +msgstr "提高大型内存模型的稳定性" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:228 +msgid "Notes" +msgstr "备注" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:230 +msgid "For inference workloads, swap can introduce second-level latency" +msgstr "对于推理工作负载,交换可能导致秒级延迟" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:231 +msgid "Recommended values are `0` or `1`" +msgstr "推荐值为 `0` 或 `1`" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:233 +msgid "5.3 Disable Automatic NUMA Balancing" +msgstr "5.3 禁用自动 NUMA 平衡" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:241 +msgid "Disables the kernel’s automatic NUMA page migration mechanism" +msgstr "禁用内核的自动 NUMA 页面迁移机制" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:245 +msgid "Prevents background memory page migrations" +msgstr "防止后台内存页迁移" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:246 +msgid "Reduces unpredictable memory access latency" +msgstr "减少不可预测的内存访问延迟" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:247 +msgid "Improves performance stability on NUMA systems" +msgstr "提高 NUMA 系统上的性能稳定性" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:249 +msgid "Recommended For" +msgstr "推荐用于" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:251 +msgid "Multi-socket servers" +msgstr "多插槽服务器" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:252 +msgid "Ascend / NPU deployments with explicit NUMA binding" +msgstr "具有显式 NUMA 绑定的 Ascend / NPU 部署" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:253 +msgid "Systems with manually managed CPU and memory affinity" +msgstr "手动管理 CPU 和内存亲和性的系统" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:255 +msgid "5.4 Increase Scheduler Migration Cost" +msgstr "5.4 增加调度器迁移成本" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:263 +msgid "Increases the cost for the scheduler to migrate tasks between CPU cores" +msgstr "增加调度器在 CPU 核心间迁移任务的成本" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:267 +msgid "Reduces frequent thread migration" +msgstr "减少频繁的线程迁移" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:268 +msgid "Improves CPU cache locality" +msgstr "提高 CPU 缓存局部性" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:269 +msgid "Lowers latency jitter for inference workloads" +msgstr "降低推理工作负载的延迟抖动" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:271 +msgid "Parameter Details" +msgstr "参数详情" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:273 +msgid "Unit: nanoseconds (ns)" +msgstr "单位:纳秒 (ns)" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:274 +msgid "Typical recommended range: 50000–100000" +msgstr "典型推荐范围:50000–100000" + +#: ../../source/developer_guide/performance_and_debug/optimization_and_tuning.md:275 +msgid "Higher values encourage threads to stay on the same CPU core" +msgstr "更高的值鼓励线程保持在同一个 CPU 核心上" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/performance_benchmark.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/performance_benchmark.po index 3c119556..82a432bd 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/performance_benchmark.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/performance_benchmark.po @@ -4,85 +4,338 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../developer_guide/performance_and_debug/performance_benchmark.md:1 +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:1 msgid "Performance Benchmark" -msgstr "性能基准" +msgstr "性能基准测试" -#: ../../developer_guide/performance_and_debug/performance_benchmark.md:2 +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:3 msgid "" -"This document details the benchmark methodology for vllm-ascend, aimed at " -"evaluating the performance under a variety of workloads. To maintain " +"This document details the benchmark methodology for vllm-ascend, aimed at" +" evaluating the performance under a variety of workloads. To maintain " "alignment with vLLM, we use the [benchmark](https://github.com/vllm-" "project/vllm/tree/main/benchmarks) script provided by the vllm project." msgstr "" -"本文档详细说明了 vllm-ascend 的基准测试方法,旨在评估其在多种工作负载下的性能。为了与 vLLM 保持一致,我们使用 vllm 项目提供的 " -"[benchmark](https://github.com/vllm-project/vllm/tree/main/benchmarks) 脚本。" +"本文档详细说明了 vllm-ascend 的基准测试方法,旨在评估其在多种工作负载下的性能。为了与 vLLM 保持一致,我们使用 vllm " +"项目提供的 [benchmark](https://github.com/vllm-" +"project/vllm/tree/main/benchmarks) 脚本。" -#: ../../developer_guide/performance_and_debug/performance_benchmark.md:4 +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:5 msgid "" -"**Benchmark Coverage**: We measure offline e2e latency and throughput, and " -"fixed-QPS online serving benchmarks, for more details see [vllm-ascend " -"benchmark scripts](https://github.com/vllm-project/vllm-" +"**Benchmark Coverage**: We measure offline E2E latency and throughput, " +"and fixed-QPS online serving benchmarks. For more details, see [vllm-" +"ascend benchmark scripts](https://github.com/vllm-project/vllm-" "ascend/tree/main/benchmarks)." msgstr "" "**基准测试覆盖范围**:我们测量离线端到端延迟和吞吐量,以及固定 QPS 的在线服务基准测试。更多详情请参见 [vllm-ascend " -"基准测试脚本](https://github.com/vllm-project/vllm-ascend/tree/main/benchmarks)。" +"基准测试脚本](https://github.com/vllm-project/vllm-" +"ascend/tree/main/benchmarks)。" -#: ../../developer_guide/performance_and_debug/performance_benchmark.md:6 +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:7 +msgid "**Legend Description**:" +msgstr "**图例说明**:" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:9 +msgid "✅ = Supported" +msgstr "✅ = 已支持" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:10 +msgid "🟡 = Partial / Work in progress" +msgstr "🟡 = 部分支持 / 开发中" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:11 +msgid "🚧 = Under development" +msgstr "🚧 = 开发中" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:13 msgid "1. Run docker container" -msgstr "1. 运行 docker 容器" +msgstr "1. 运行 Docker 容器" -#: ../../developer_guide/performance_and_debug/performance_benchmark.md:31 +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:39 msgid "2. Install dependencies" msgstr "2. 安装依赖项" -#: ../../developer_guide/performance_and_debug/performance_benchmark.md:38 -msgid "3. (Optional)Prepare model weights" -msgstr "3.(可选)准备模型权重" +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:47 +msgid "3. Run basic benchmarks" +msgstr "3. 运行基础基准测试" -#: ../../developer_guide/performance_and_debug/performance_benchmark.md:39 +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:49 msgid "" -"For faster running speed, we recommend downloading the model in advance:" -msgstr "为了更快的运行速度,建议提前下载模型:" +"This section introduces how to perform performance testing using the " +"benchmark suite built into VLLM." +msgstr "本节介绍如何使用 VLLM 内置的基准测试套件进行性能测试。" -#: ../../developer_guide/performance_and_debug/performance_benchmark.md:44 +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:51 +msgid "3.1 Dataset" +msgstr "3.1 数据集" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:53 msgid "" -"You can also replace all model paths in the [json](https://github.com/vllm-" -"project/vllm-ascend/tree/main/benchmarks/tests) files with your local paths:" +"VLLM supports a variety of [datasets](https://github.com/vllm-" +"project/vllm/blob/main/vllm/benchmarks/datasets.py)." +msgstr "VLLM 支持多种[数据集](https://github.com/vllm-project/vllm/blob/main/vllm/benchmarks/datasets.py)。" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "Dataset" +msgstr "数据集" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "Online" +msgstr "在线" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "Offline" +msgstr "离线" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "Data Path" +msgstr "数据路径" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "ShareGPT" +msgstr "ShareGPT" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "✅" +msgstr "✅" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "" +"`wget " +"https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json`" msgstr "" -"你也可以将 [json](https://github.com/vllm-project/vllm-" -"ascend/tree/main/benchmarks/tests) 文件中的所有模型路径替换为你的本地路径:" +"`wget " +"https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json`" -#: ../../developer_guide/performance_and_debug/performance_benchmark.md:60 -msgid "4. Run benchmark script" -msgstr "4. 运行基准测试脚本" +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "ShareGPT4V (Image)" +msgstr "ShareGPT4V (图像)" -#: ../../developer_guide/performance_and_debug/performance_benchmark.md:61 -msgid "Run benchmark script:" -msgstr "运行基准测试脚本:" - -#: ../../developer_guide/performance_and_debug/performance_benchmark.md:66 -msgid "After about 10 mins, the output is as shown below:" -msgstr "大约 10 分钟后,输出如下所示:" - -#: ../../developer_guide/performance_and_debug/performance_benchmark.md:176 +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 msgid "" -"The result json files are generated into the path `benchmark/results` These " -"files contain detailed benchmarking results for further analysis." -msgstr "结果 json 文件会生成到路径 `benchmark/results`。这些文件包含了用于进一步分析的详细基准测试结果。" +"`wget https://huggingface.co/datasets/Lin-" +"Chen/ShareGPT4V/resolve/main/sharegpt4v_instruct_gpt4-vision_cap100k.json`
Note" +" that the images need to be downloaded separately. For example, to " +"download COCO's 2017 Train images:
`wget " +"http://images.cocodataset.org/zips/train2017.zip`" +msgstr "" +"`wget https://huggingface.co/datasets/Lin-" +"Chen/ShareGPT4V/resolve/main/sharegpt4v_instruct_gpt4-vision_cap100k.json`
请注意,图像需要单独下载。例如,要下载" +" COCO 2017 训练集图像:
`wget " +"http://images.cocodataset.org/zips/train2017.zip`" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "ShareGPT4Video (Video)" +msgstr "ShareGPT4Video (视频)" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "`git clone https://huggingface.co/datasets/ShareGPT4Video/ShareGPT4Video`" +msgstr "`git clone https://huggingface.co/datasets/ShareGPT4Video/ShareGPT4Video`" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "BurstGPT" +msgstr "BurstGPT" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "" +"`wget " +"https://github.com/HPMLL/BurstGPT/releases/download/v1.1/BurstGPT_without_fails_2.csv`" +msgstr "" +"`wget " +"https://github.com/HPMLL/BurstGPT/releases/download/v1.1/BurstGPT_without_fails_2.csv`" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "Sonnet (deprecated)" +msgstr "Sonnet (已弃用)" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "Local file: `benchmarks/sonnet.txt`" +msgstr "本地文件:`benchmarks/sonnet.txt`" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "Random" +msgstr "随机" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "`synthetic`" +msgstr "`synthetic`" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "RandomMultiModal (Image/Video)" +msgstr "RandomMultiModal (图像/视频)" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "🟡" +msgstr "🟡" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "🚧" +msgstr "🚧" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "RandomForReranking" +msgstr "RandomForReranking" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "Prefix Repetition" +msgstr "前缀重复" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "HuggingFace-VisionArena" +msgstr "HuggingFace-VisionArena" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "`lmarena-ai/VisionArena-Chat`" +msgstr "`lmarena-ai/VisionArena-Chat`" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "HuggingFace-MMVU" +msgstr "HuggingFace-MMVU" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "`yale-nlp/MMVU`" +msgstr "`yale-nlp/MMVU`" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "HuggingFace-InstructCoder" +msgstr "HuggingFace-InstructCoder" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "`likaixin/InstructCoder`" +msgstr "`likaixin/InstructCoder`" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "HuggingFace-AIMO" +msgstr "HuggingFace-AIMO" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "" +"`AI-MO/aimo-validation-aime`, `AI-MO/NuminaMath-1.5`, `AI-MO/NuminaMath-" +"CoT`" +msgstr "`AI-MO/aimo-validation-aime`, `AI-MO/NuminaMath-1.5`, `AI-MO/NuminaMath-CoT`" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "HuggingFace-Other" +msgstr "HuggingFace-其他" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "`lmms-lab/LLaVA-OneVision-Data`, `Aeala/ShareGPT_Vicuna_unfiltered`" +msgstr "`lmms-lab/LLaVA-OneVision-Data`, `Aeala/ShareGPT_Vicuna_unfiltered`" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "HuggingFace-MTBench" +msgstr "HuggingFace-MTBench" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "`philschmid/mt-bench`" +msgstr "`philschmid/mt-bench`" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "HuggingFace-Blazedit" +msgstr "HuggingFace-Blazedit" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "`vdaita/edit_5k_char`, `vdaita/edit_10k_char`" +msgstr "`vdaita/edit_5k_char`, `vdaita/edit_10k_char`" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "Spec Bench" +msgstr "Spec Bench" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "" +"`wget https://raw.githubusercontent.com/hemingkx/Spec-" +"Bench/refs/heads/main/data/spec_bench/question.jsonl`" +msgstr "`wget https://raw.githubusercontent.com/hemingkx/Spec-Bench/refs/heads/main/data/spec_bench/question.jsonl`" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "Custom" +msgstr "自定义" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:15 +msgid "Local file: `data.jsonl`" +msgstr "本地文件:`data.jsonl`" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:83 +msgid "" +"The datasets mentioned above are all links to datasets on huggingface. " +"The dataset's `dataset-name` should be set to `hf`. For local `dataset-" +"path`, please set `hf-name` to its Hugging Face ID like" +msgstr "上述提到的数据集均为 Hugging Face 上数据集的链接。数据集的 `dataset-name` 应设置为 `hf`。对于本地的 `dataset-path`,请将 `hf-name` 设置为其 Hugging Face ID,例如:" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:93 +msgid "3.2 Run basic benchmark" +msgstr "3.2 运行基础基准测试" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:95 +msgid "3.2.1 Online serving" +msgstr "3.2.1 在线服务" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:97 +msgid "First start serving your model:" +msgstr "首先启动模型服务:" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:103 +msgid "Then run the benchmarking script:" +msgstr "然后运行基准测试脚本:" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:118 +msgid "If successful, you will see the following output:" +msgstr "如果成功,您将看到以下输出:" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:147 +msgid "3.2.2 Offline Throughput Benchmark" +msgstr "3.2.2 离线吞吐量基准测试" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:158 +msgid "If successful, you will see the following output" +msgstr "如果成功,您将看到以下输出" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:167 +msgid "3.2.4 Multi-Modal Benchmark" +msgstr "3.2.4 多模态基准测试" + +#: ../../source/developer_guide/performance_and_debug/performance_benchmark.md:216 +msgid "3.2.5 Embedding Benchmark" +msgstr "3.2.5 嵌入基准测试" + +#~ msgid "3. (Optional)Prepare model weights" +#~ msgstr "3.(可选)准备模型权重" + +#~ msgid "" +#~ "For faster running speed, we recommend" +#~ " downloading the model in advance:" +#~ msgstr "为了获得更快的运行速度,我们建议提前下载模型:" + +#~ msgid "" +#~ "You can also replace all model " +#~ "paths in the [json](https://github.com/vllm-" +#~ "project/vllm-ascend/tree/main/benchmarks/tests) files " +#~ "with your local paths:" +#~ msgstr "" +#~ "您也可以将 [json](https://github.com/vllm-project/vllm-" +#~ "ascend/tree/main/benchmarks/tests) 文件中的所有模型路径替换为您的本地路径:" + +#~ msgid "After about 10 mins, the output is as shown below:" +#~ msgstr "大约 10 分钟后,输出如下所示:" + +#~ msgid "" +#~ "The result json files are generated " +#~ "into the path `benchmark/results` These " +#~ "files contain detailed benchmarking results" +#~ " for further analysis." +#~ msgstr "结果 JSON 文件将生成到路径 `benchmark/results`。这些文件包含详细的基准测试结果,可用于进一步分析。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/service_profiling_guide.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/service_profiling_guide.po index ca57e0a6..c5cc7740 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/service_profiling_guide.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/service_profiling_guide.po @@ -4,572 +4,880 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-10-31 00:00+0000\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:1 msgid "Service Profiling Guide" -msgstr "服务化性能采集指南" +msgstr "服务性能剖析指南" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "In inference service processes, we sometimes need to monitor the internal execution flow of the inference service framework to identify performance issues. By collecting start and end timestamps of key processes, identifying critical functions or iterations, recording key events, and capturing diverse types of information, we can quickly pinpoint performance bottlenecks." -msgstr "在推理服务过程中,我们有时需要监控推理服务框架的内部执行流程以定位性能问题。通过采集关键流程的起止时间、识别关键函数或迭代、记录关键事件并捕获多种类型的信息,可以快速定位性能瓶颈。" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:3 msgid "" -"This guide walks you through collecting performance data for the vllm-ascend " -"service framework and operators. It covers the full workflow from preparation " -"and collection to analysis and visualization, helping you quickly get started " -"with the profiling tool." -msgstr "" -"本部分将指导你如何采集 vllm-ascend 的服务化框架性能数据以及算子性能数据,覆盖从准备、采集、解析到结果展示的完整流程,帮助你快速上手性能采集工具。" +"In an inference service process, it is sometimes necessary to monitor the" +" internal execution flow of the inference service framework to identify " +"performance issues. By collecting start and end timestamps of key " +"processes, identifying key functions or iterations, recording critical " +"events, and gathering various types of information, performance " +"bottlenecks can be quickly located." +msgstr "在推理服务过程中,有时需要监控推理服务框架的内部执行流程以定位性能问题。通过采集关键流程的起止时间戳、识别关键函数或迭代、记录关键事件并收集各类信息,可以快速定位性能瓶颈。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Quick Start" -msgstr "快速开始" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:5 +msgid "" +"This guide will walk you through the process of collecting performance " +"data from the vLLM-Ascend service framework and operators. It covers the " +"complete workflow from preparation, collection, analysis, to " +"visualization, helping you quickly get started with performance " +"collection tools." +msgstr "本指南将引导您完成从 vLLM-Ascend 服务框架和算子收集性能数据的过程。它涵盖了从准备、采集、分析到可视化的完整工作流程,帮助您快速上手性能采集工具。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "0 Installation" -msgstr "0 安装" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:7 +msgid "" +"Two performance collection solutions are provided below: Ascend PyTorch " +"Profiler and MS Service Profiler. You can choose the appropriate tool for" +" performance analysis and troubleshooting based on your actual " +"requirements." +msgstr "以下提供两种性能采集方案:Ascend PyTorch Profiler 和 MS Service Profiler。您可以根据实际需求选择合适的工具进行性能分析和问题排查。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:9 +msgid "Solution Comparison" +msgstr "方案对比" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "Feature" +msgstr "特性" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:27 +msgid "Ascend PyTorch Profiler" +msgstr "Ascend PyTorch Profiler" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:118 +msgid "MS Service Profiler" +msgstr "MS Service Profiler" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "Installation Method" +msgstr "安装方式" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "Built-in, no additional installation required" +msgstr "内置,无需额外安装" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "Requires pip installation of msserviceprofiler" +msgstr "需要通过 pip 安装 msserviceprofiler" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "Collection Granularity" +msgstr "采集粒度" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "PyTorch operator level" +msgstr "PyTorch 算子级别" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "Service framework function level" +msgstr "服务框架函数级别" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "Control Method" +msgstr "控制方式" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "API request control" +msgstr "API 请求控制" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "Configuration file control" +msgstr "配置文件控制" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "Applicable Scenarios" +msgstr "适用场景" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "Model operator performance analysis" +msgstr "模型算子性能分析" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "Service framework workflow analysis" +msgstr "服务框架工作流分析" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "Data Format" +msgstr "数据格式" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "ascend_pt format" +msgstr "ascend_pt 格式" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "Chrome Tracing + CSV" +msgstr "Chrome Tracing + CSV" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "Main Advantage" +msgstr "主要优势" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "Operator-level performance analysis" +msgstr "算子级性能分析" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "Service framework workflow visualization" +msgstr "服务框架工作流可视化" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:20 +msgid "Quick Selection Guide" +msgstr "快速选择指南" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:22 +msgid "" +"[**Model Operator Performance** → Use Ascend PyTorch Profiler](#ascend-" +"pytorch-profiler)" +msgstr "[**模型算子性能** → 使用 Ascend PyTorch Profiler](#ascend-pytorch-profiler)" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:23 +msgid "" +"[**Service Framework Workflow** → Use MS Service Profiler](#ms-service-" +"profiler)" +msgstr "[**服务框架工作流** → 使用 MS Service Profiler](#ms-service-profiler)" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:29 +msgid "0. Installation and Configuration" +msgstr "0. 安装与配置" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:31 +msgid "" +"No additional packages need to be installed; it can be enabled through " +"command-line configuration. Currently, vLLM enables **python stack** by " +"default, which can significantly inflate the collected performance data. " +"If you do not wish to collect **python stack**, you can disable it using " +"`torch_profiler_with_stack=false`." +msgstr "无需安装额外包;可通过命令行配置启用。目前 vLLM 默认启用 **python 调用栈**,这会显著增加采集的性能数据量。如果您不希望采集 **python 调用栈**,可以使用 `torch_profiler_with_stack=false` 禁用它。" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:33 +msgid "1. Preparation for Collection" +msgstr "1. 采集准备" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:35 +msgid "" +"Start the online service and set the `--profiler-config` parameter to " +"control the path for saving performance files. After the parameter is " +"set, the collection function is enabled." +msgstr "启动在线服务,并设置 `--profiler-config` 参数来控制性能文件的保存路径。参数设置后,采集功能即被启用。" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:50 +msgid "" +"Note:**January 19, 2026: The vLLM mainline has deprecated the " +"VLLM_TORCH_PROFILER_DIR environment variable.**[Related " +"PR](https://github.com/vllm-project/vllm-ascend/pull/5928) When using " +"the vLLM Ascend mainline code to collect profiler data, remember to use " +"the `--profiler-config` (online) parameter or the `profiler_config` " +"(offline) parameter." +msgstr "注意:**2026年1月19日:vLLM 主线已弃用 VLLM_TORCH_PROFILER_DIR 环境变量。**[相关 PR](https://github.com/vllm-project/vllm-ascend/pull/5928) 使用 vLLM Ascend 主线代码收集性能剖析器数据时,请记住使用 `--profiler-config`(在线)参数或 `profiler_config`(离线)参数。" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:52 +msgid "2. Start Collection" +msgstr "2. 开始采集" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:54 +msgid "" +"Performance collection is controlled by sending API requests. You can " +"start collection after stabilizing the actual business data and collect " +"profiling for a few seconds before stopping; or you can start collection " +"first, then send business requests, and finally stop." +msgstr "性能采集通过发送 API 请求来控制。您可以在实际业务数据稳定后开始采集,采集几秒后停止;或者可以先开始采集,然后发送业务请求,最后停止。" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:56 +msgid "Send the following request to start the profiling service:" +msgstr "发送以下请求以启动性能剖析服务:" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:62 +msgid "Send the following request to stop the profiling service:" +msgstr "发送以下请求以停止性能剖析服务:" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:68 +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:154 +msgid "3. Send Requests" +msgstr "3. 发送请求" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:70 +msgid "" +"Send requests according to your actual business data. After sending the " +"requests, stop the profiling service, and the data will be automatically " +"saved to the previously configured path:" +msgstr "根据您的实际业务数据发送请求。发送请求后,停止性能剖析服务,数据将自动保存到先前配置的路径:" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:85 +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:169 +msgid "4. Analyze Data" +msgstr "4. 分析数据" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:87 +msgid "" +"Navigate to the `./vllm_profile` directory and locate the generated " +"`*ascend_pt` folder. This folder needs to be analyzed before profiling " +"data can be examined." +msgstr "导航到 `./vllm_profile` 目录并找到生成的 `*ascend_pt` 文件夹。在检查性能剖析数据之前,需要先分析此文件夹。" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:94 +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:179 +msgid "5. View Results" +msgstr "5. 查看结果" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:96 +msgid "" +"After analysis, the `*ascend_pt` directory will contain many files, with " +"the main analysis focus being the `ASCEND_PROFILER_OUTPUT` folder. This " +"directory will include the following files:" +msgstr "分析后,`*ascend_pt` 目录将包含许多文件,主要分析重点是 `ASCEND_PROFILER_OUTPUT` 文件夹。此目录将包含以下文件:" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:98 +msgid "`analysis.db`: Performance data in database format" +msgstr "`analysis.db`:数据库格式的性能数据" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:100 +msgid "`api_statistic.csv`: API call statistics" +msgstr "`api_statistic.csv`:API 调用统计" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:102 +msgid "`ascend_pytorch_profiler_0.db`: Performance data in database format" +msgstr "`ascend_pytorch_profiler_0.db`:数据库格式的性能数据" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:104 +msgid "`kernel_details.csv`: Kernel-level related data" +msgstr "`kernel_details.csv`:内核级相关数据" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:106 +msgid "`operator_details.csv`: Operator-level related data" +msgstr "`operator_details.csv`:算子级相关数据" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:108 +msgid "`op_statistic.csv`: Operator utilization data" +msgstr "`op_statistic.csv`:算子利用率数据" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:110 +msgid "`step_trace_time.csv`: Scheduling data" +msgstr "`step_trace_time.csv`:调度数据" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:112 +msgid "" +"`trace_view.json`: Chrome tracing format data, can be opened with " +"[MindStudio " +"Insight](https://www.hiascend.com/document/detail/zh/mindstudio/81RC1/GUI_baseddevelopmenttool/msascendinsightug/Insight_userguide_0002.html)" +msgstr "`trace_view.json`:Chrome 追踪格式数据,可在 [MindStudio Insight](https://www.hiascend.com/document/detail/zh/mindstudio/81RC1/GUI_baseddevelopmenttool/msascendinsightug/Insight_userguide_0002.html) 中打开" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:114 +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:370 +msgid "[↑ Back to Top](#service-profiling-guide)" +msgstr "[↑ 返回顶部](#service-profiling-guide)" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:120 +msgid "0. Installation" +msgstr "0. 安装" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:122 msgid "Install the `msserviceprofiler` package using pip:" msgstr "使用 pip 安装 `msserviceprofiler` 包:" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "1 Preparation" -msgstr "1 准备采集" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:128 +msgid "1. Preparation" +msgstr "1. 准备" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:130 msgid "" "Before starting the service, set the environment variable " "`SERVICE_PROF_CONFIG_PATH` to point to the profiling configuration file, " -"and set the environment variable `PROFILING_SYMBOLS_PATH` to specify the YAML " -"configuration file for the symbols that need to be imported. After that, start " -"the vLLM service according to your deployment method." -msgstr "" -"在启动服务之前,请设置环境变量`SERVICE_PROF_CONFIG_PATH`指定需要加载的性能分析配置文件,并设置环境变量`PROFILING_SYMBOLS_PATH`来指定需要导入的符号的 YAML 配置文件。之后,根据您的部署方式启动 vLLM 服务。" +"and set the environment variable `PROFILING_SYMBOLS_PATH` to specify the " +"YAML configuration file for the symbols that need to be imported. After " +"that, start the vLLM service according to your deployment method." +msgstr "在启动服务之前,请设置环境变量 `SERVICE_PROF_CONFIG_PATH` 指向性能剖析配置文件,并设置环境变量 `PROFILING_SYMBOLS_PATH` 来指定需要导入的符号的 YAML 配置文件。之后,根据您的部署方式启动 vLLM 服务。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "cd ${path_to_store_profiling_files}" -msgstr "cd ${profiling 文件存放路径}" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Set environment variable" -msgstr "设置环境变量" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Start vLLM service" -msgstr "启动 vLLM 服务" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:142 msgid "" -"The file `ms_service_profiler_config.json` is the profiling configuration. " -"If it does not exist at the specified path, a default configuration will be " -"generated automatically. If needed, you can customize it in advance according " -"to the instructions in the `Profiling Configuration File` section below." -msgstr "" -"其中 `ms_service_profiler_config.json` 为采集配置文件。若指定路径下不存在该文件,将自动生成一份默认配置。若有需要,可参照 `采集配置文件说明` 章节提前进行自定义配置。" +"The file `ms_service_profiler_config.json` is the profiling " +"configuration. If it does not exist at the specified path, a default " +"configuration will be generated automatically. If needed, you can " +"customize it in advance according to the instructions in the `Profiling " +"Configuration File` section below." +msgstr "其中 `ms_service_profiler_config.json` 为性能剖析配置文件。若指定路径下不存在该文件,将自动生成一份默认配置。若有需要,可参照下文 `性能剖析配置文件` 章节提前进行自定义配置。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:144 msgid "" -"`service_profiling_symbols.yaml` is the configuration file containing " -"the profiling points to be imported. You can choose **not** to set the " +"`service_profiling_symbols.yaml` is the configuration file containing the" +" profiling points to be imported. You can choose **not** to set the " "`PROFILING_SYMBOLS_PATH` environment variable, in which case the default " "configuration file will be used. If the file does not exist at the path " -"you specified, likewise, the system will generate a configuration file at " -"your specified path for future configuration. You can customize it according " -"to the instructions in the `Symbols Configuration File` section below." -msgstr "`service_profiling_symbols.yaml` 为需要导入的埋点配置文件。你也可以选择不设置环境变量 `PROFILING_SYMBOLS_PATH`,此时将使用默认的配置文件;若你指定的路径下不存在该文件,系统同样会在你指定的路径生成一份配置文件以便后续修改。可参考 `点位配置文件说明` 一节进行自定义。" +"you specified, likewise, the system will generate a configuration file at" +" your specified path for future configuration. You can customize it " +"according to the instructions in the `Symbols Configuration File` section" +" below." +msgstr "" +"`service_profiling_symbols.yaml` 为需要导入的埋点配置文件。你也可以选择不设置环境变量 " +"`PROFILING_SYMBOLS_PATH`,此时将使用默认的配置文件;若你指定的路径下不存在该文件,系统同样会在你指定的路径生成一份配置文件以便后续修改。可参考" +" `点位配置文件说明` 一节进行自定义。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "2 Enable Profiling" -msgstr "2 开启采集" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:146 +msgid "2. Enable Profiling" +msgstr "2. 开启性能数据采集" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:148 msgid "" -"To enable the performance data collection switch, change the `enable` field from " -"`0` to `1` in the configuration file `ms_service_profiler_config.json`. This can " -"be accomplished by executing the following sed command:" -msgstr "将配置文件`ms_service_profiler_config.json`中的 `enable` 字段由 `0` 修改为 `1`,即可开启性能数据采集的开关,可以通过执行下面sed指令完成采集服务的开启:" +"To enable the performance data collection switch, change the `enable` " +"field from `0` to `1` in the configuration file " +"`ms_service_profiler_config.json`. This can be accomplished by executing " +"the following sed command:" +msgstr "" +"将配置文件`ms_service_profiler_config.json`中的 `enable` 字段由 `0` 修改为 " +"`1`,即可开启性能数据采集的开关,可以通过执行下面sed指令完成采集服务的开启:" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "3 Send Requests" -msgstr "3 发送请求" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "" -"Choose a request-sending method that suits your actual profiling needs:" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:156 +msgid "Choose a request-sending method that suits your actual profiling needs:" msgstr "根据实际采集需求选择请求发送方式:" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "4 Analyze Data" -msgstr "4 解析数据" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "xxxx-xxxx is the directory automatically created based on vLLM startup time" -msgstr "xxxx-xxxx 为采集工具根据 vLLM 启动时间自动创建的存放目录" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Analyze data" -msgstr "解析数据" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "5 View Results" -msgstr "5 查看结果" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:181 msgid "After analysis, the `output` directory will contain:" msgstr "解析完成后,`output` 目录下会生成:" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:183 msgid "" -"`chrome_tracing.json`: Chrome tracing format data, which can be opened in " -"[MindStudio Insight](https://www.hiascend.com/document/detail/zh/mindstudio/81RC1/GUI_baseddevelopmenttool/msascendinsightug/Insight_userguide_0002.html)." +"`chrome_tracing.json`: Chrome tracing format data, which can be opened in" +" [MindStudio " +"Insight](https://www.hiascend.com/document/detail/zh/mindstudio/81RC1/GUI_baseddevelopmenttool/msascendinsightug/Insight_userguide_0002.html)." msgstr "" -"`chrome_tracing.json`:Chrome 追踪格式数据,可在 [MindStudio Insight](https://www.hiascend.com/document/detail/zh/mindstudio/81RC1/GUI_baseddevelopmenttool/msascendinsightug/Insight_userguide_0002.html) 中打开。" +"`chrome_tracing.json`:Chrome 追踪格式数据,可在 [MindStudio " +"Insight](https://www.hiascend.com/document/detail/zh/mindstudio/81RC1/GUI_baseddevelopmenttool/msascendinsightug/Insight_userguide_0002.html)" +" 中打开。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:184 msgid "`profiler.db`: Performance data in database format." msgstr "`profiler.db`:数据库格式的性能数据。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:185 msgid "`request.csv`: Request-related data." msgstr "`request.csv`:请求相关数据。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:186 msgid "`request_summary.csv`: Overall request metrics." msgstr "`request_summary.csv`:请求总体统计指标。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:187 msgid "`kvcache.csv`: KV Cache-related data." msgstr "`kvcache.csv`:KV Cache 相关数据。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:188 msgid "`batch.csv`: Batch scheduling-related data." msgstr "`batch.csv`:批次调度相关数据。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:189 msgid "`batch_summary.csv`: Overall batch scheduling metrics." msgstr "`batch_summary.csv`:批次调度总体统计指标。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:190 msgid "`service_summary.csv`: Overall service-level metrics." msgstr "`service_summary.csv`:服务化维度总体统计指标。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Appendix" -msgstr "附录" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:194 +msgid "6. Appendix related to MS Service Profiler" +msgstr "6. MS Service Profiler 相关附录" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "1 Profiling Configuration File" -msgstr "1 采集配置文件说明" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:198 +msgid "6.1 Profiling Configuration File" +msgstr "6.1 采集配置文件说明" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "The profiling configuration file controls profiling parameters and behavior." +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:200 +msgid "" +"The profiling configuration file controls profiling parameters and " +"behavior." msgstr "采集配置文件用于控制性能数据采集的参数与行为。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:202 msgid "File Format" msgstr "配置文件格式" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:204 msgid "The configuration is in JSON format. Main parameters:" msgstr "配置文件为 JSON 格式,主要参数如下:" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "Parameter" msgstr "参数" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "Description" msgstr "说明" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "Required" msgstr "是否必选" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "enable" msgstr "enable" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "Switch for profiling:
0: disable
1: enable
Default: 0" msgstr "是否开启性能数据采集的开关:
0:关闭
1:开启
默认值:0" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "Yes" msgstr "是" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "prof_dir" msgstr "prof_dir" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Directory to store collected performance data.
Default: $HOME/.ms_service_profiler" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "" +"Directory to store collected performance data.
Default: " +"$HOME/.ms_service_profiler" msgstr "采集到性能数据的存放路径,支持用户自定义。
默认值:$HOME/.ms_service_profiler" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "No" msgstr "否" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "profiler_level" msgstr "profiler_level" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "Data collection level. Default is \"INFO\" (normal level)." msgstr "数据采集等级。默认值为\"INFO\",指普通级别的性能数据。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "host_system_usage_freq" msgstr "host_system_usage_freq" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Sampling frequency of host CPU and memory metrics. Disabled by default. Range: integer 1–50, unit: Hz (times per second). Set to -1 to disable.
Note: Enabling this may consume significant memory." -msgstr "CPU和内存系统指标采集频率,默认关闭不采集。范围整数1~50,单位hz,表示每秒采集的次数。设置为-1时关闭采集该指标。
说明:开启该功能可能占用较大内存" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "" +"Sampling frequency of host CPU and memory metrics. Disabled by default. " +"Range: integer 1–50, unit: Hz (times per second). Set to -1 to disable. " +"
Note: Enabling this may consume significant memory." +msgstr "" +"CPU和内存系统指标采集频率,默认关闭不采集。范围整数1~50,单位hz,表示每秒采集的次数。设置为-1时关闭采集该指标。
说明:开启该功能可能占用较大内存" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "npu_memory_usage_freq" msgstr "npu_memory_usage_freq" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Sampling frequency of NPU memory utilization. Disabled by default. Range: integer 1–50, unit: Hz (times per second). Set to -1 to disable.
Note: Enabling this may consume significant memory." -msgstr "NPU Memory使用率指标的采集频率,默认关闭不采集。范围整数1~50,单位hz,表示每秒采集的次数。设置为-1时关闭采集该指标。
说明:开启该功能可能占用较大内存" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "" +"Sampling frequency of NPU memory utilization. Disabled by default. Range:" +" integer 1–50, unit: Hz (times per second). Set to -1 to disable.
Note: Enabling this may consume significant memory." +msgstr "" +"NPU Memory使用率指标的采集频率,默认关闭不采集。范围整数1~50,单位hz,表示每秒采集的次数。设置为-1时关闭采集该指标。
说明:开启该功能可能占用较大内存" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "acl_task_time" msgstr "acl_task_time" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Switch to collect operator dispatch latency and execution latency:
0: disable (default; 0 or invalid values mean disabled).
1: enable; calls `aclprofCreateConfig` with `ACL_PROF_TASK_TIME_L0`.
2: enable MSPTI-based data dumping; uses MSPTI for profiling and requires: `export LD_PRELOAD=$ASCEND_TOOLKIT_HOME/lib64/libmspti.so`" -msgstr "开启采集算子下发耗时、算子执行耗时数据的开关,取值为:
0:关闭。默认值,配置为0或其他非法值均表示关闭。
1:开启。该功能开启时调用aclprofCreateConfig接口的ACL_PROF_TASK_TIME_L0参数。
2:开启基于MSPTI接口的数据落盘。该功能开启时调用MSPTI接口进行性能数据采集,需要配置如下环境变量:export LD_PRELOAD=$ASCEND_TOOLKIT_HOME/lib64/libmspti.so" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "" +"Switch to collect operator dispatch latency and execution latency:
0: disable (default; 0 or invalid values mean disabled).
1: " +"enable; calls `aclprofCreateConfig` with `ACL_PROF_TASK_TIME_L0`.
2:" +" enable MSPTI-based data dumping; uses MSPTI for profiling and requires: " +"`export LD_PRELOAD=$ASCEND_TOOLKIT_HOME/lib64/libmspti.so`" +msgstr "" +"开启采集算子下发耗时、算子执行耗时数据的开关,取值为:
0:关闭。默认值,配置为0或其他非法值均表示关闭。
1:开启。该功能开启时调用aclprofCreateConfig接口的ACL_PROF_TASK_TIME_L0参数。
2:开启基于MSPTI接口的数据落盘。该功能开启时调用MSPTI接口进行性能数据采集,需要配置如下环境变量:export " +"LD_PRELOAD=$ASCEND_TOOLKIT_HOME/lib64/libmspti.so" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "acl_prof_task_time_level" msgstr "acl_prof_task_time_level" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Level and duration for profiling:
L0: collect operator dispatch and execution latency only; lower overhead (no operator basic info).
L1: collect AscendCL interface performance (host–device and inter-device sync/async memory copy latencies), plus operator dispatch, execution, and basic info for comprehensive analysis.
time: profiling duration, integer 1–999, in seconds.
If unset, defaults to L0 until program exit; invalid values fall back to defaults.
Level and duration can be combined, e.g., `\"acl_prof_task_time_level\": \"L1,10\"`." -msgstr "设置性能数据采集的Level等级和时长,取值为:
L0:Level0等级,表示采集算子下发耗时、算子执行耗时数据。与L1相比,由于不采集算子基本信息数据,采集时性能开销较小,可更精准统计相关耗时数据。
L1:Level1等级,采集AscendCL接口的性能数据,包括Host与Device之间、Device间的同步异步内存复制时延;采集算子下发耗时、算子执行耗时数据以及算子基本信息数据,提供更全面的性能分析数据。
time:采集时长,取值范围为1~999的正整数,单位s。
默认未配置本参数,表示采集L0数据,且采集到程序执行结束。配置其他非法值时取默认值。
采集的Level等级和时长可同时配置,例如\"acl_prof_task_time_level\": \"L1,10\"。" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "" +"Level and duration for profiling:
L0: collect operator dispatch and" +" execution latency only; lower overhead (no operator basic info).
L1: collect AscendCL interface performance (host–device and inter-" +"device sync/async memory copy latencies), plus operator dispatch, " +"execution, and basic info for comprehensive analysis.
time: " +"profiling duration, integer 1–999, in seconds.
If unset, defaults to" +" L0 until program exit; invalid values fall back to defaults.
Level " +"and duration can be combined, e.g., `\"acl_prof_task_time_level\": " +"\"L1,10\"`." +msgstr "" +"设置性能数据采集的Level等级和时长,取值为:
L0:Level0等级,表示采集算子下发耗时、算子执行耗时数据。与L1相比,由于不采集算子基本信息数据,采集时性能开销较小,可更精准统计相关耗时数据。L1:Level1等级,采集AscendCL接口的性能数据,包括Host与Device之间、Device间的同步异步内存复制时延;采集算子下发耗时、算子执行耗时数据以及算子基本信息数据,提供更全面的性能分析数据。time:采集时长,取值范围为1~999的正整数,单位s。
默认未配置本参数,表示采集L0数据,且采集到程序执行结束。配置其他非法值时取默认值。
采集的Level等级和时长可同时配置,例如\"acl_prof_task_time_level\": \"L1,10\"。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "api_filter" msgstr "api_filter" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Filter to select API performance data to dump. For example, specifying \"matmul\" dumps all API data whose `name` contains \"matmul\". String, case-sensitive; use \";\" to separate multiple targets. Empty means dump all.
Effective only when `acl_task_time` is 2." -msgstr "对性能数据进行过滤,配置该参数可自定义采集配置的API性能数据,例如传入\"matmul\"会落盘所有API数据中name字段包含matmul的性能数据。str类型,区分大小写,多个不同的筛选目标用\";\"隔开,默认为空,表示落盘所有数据。
仅当acl_task_time参数值为2时生效。" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "" +"Filter to select API performance data to dump. For example, specifying " +"\"matmul\" dumps all API data whose `name` contains \"matmul\". String, " +"case-sensitive; use \";\" to separate multiple targets. Empty means dump " +"all.
Effective only when `acl_task_time` is 2." +msgstr "" +"对性能数据进行过滤,配置该参数可自定义采集配置的API性能数据,例如传入\"matmul\"会落盘所有API数据中name字段包含matmul的性能数据。str类型,区分大小写,多个不同的筛选目标用\";\"隔开,默认为空,表示落盘所有数据。仅当acl_task_time参数值为2时生效。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "kernel_filter" msgstr "kernel_filter" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Filter to select kernel performance data to dump. For example, specifying \"matmul\" dumps all kernel data whose `name` contains \"matmul\". String, case-sensitive; use \";\" to separate multiple targets. Empty means dump all.
Effective only when `acl_task_time` is 2." -msgstr "对性能数据进行过滤,配置该参数可自定义采集配置的kernel性能数据,例如传入\"matmul\"会落盘所有kernel数据中name字段包含matmul的性能数据。str类型,区分大小写,多个不同的筛选目标用\";\"隔开,默认为空,表示落盘所有数据。
仅当acl_task_time参数值为2时生效。" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "" +"Filter to select kernel performance data to dump. For example, specifying" +" \"matmul\" dumps all kernel data whose `name` contains \"matmul\". " +"String, case-sensitive; use \";\" to separate multiple targets. Empty " +"means dump all.
Effective only when `acl_task_time` is 2." +msgstr "" +"对性能数据进行过滤,配置该参数可自定义采集配置的kernel性能数据,例如传入\"matmul\"会落盘所有kernel数据中name字段包含matmul的性能数据。str类型,区分大小写,多个不同的筛选目标用\";\"隔开,默认为空,表示落盘所有数据。仅当acl_task_time参数值为2时生效。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "timelimit" msgstr "timelimit" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Profiling duration for the service. The process stops automatically after this time. Range: integer 0–7200, unit: seconds. Default 0 means unlimited." +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "" +"Profiling duration for the service. The process stops automatically after" +" this time. Range: integer 0–7200, unit: seconds. Default 0 means " +"unlimited." msgstr "设置服务化性能数据采集的时长,配置该参数后,采集进程将在运行指定的时间后自动停止,取值范围为0~7200的整数,单位s,默认值0(表示不限制采集时间)" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "domain" msgstr "domain" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Limit profiling to the specified domains to reduce data volume. String, separated by semicolons, case-sensitive, e.g., \"Request; KVCache\".
Empty means all available domains.
Available domains: Request, KVCache, ModelExecute, BatchSchedule, Communication.
Note: If the selected domains are incomplete, analysis output may show warnings due to missing data. See [Reference Table 1](https://www.hiascend.com/document/detail/zh/canncommercial/82RC1/devaids/Profiling/mindieprofiling_0009.html#ZH-CN_TOPIC_0000002370256365__table1985410131831)." -msgstr "设置采集指定domain域下的性能数据,减少采集数据量。输入参数为字符串格式,英文分号作为分隔符,区分大小写,例如:\"Request; KVCache\"。
默认为空,表示采集当前所有domain域内性能数据。
当前已有domain域为:Request、KVCache、ModelExecute、BatchSchedule、Communication。
说明:
若指定domain域不全,采集数据不满足解析输出件生成时,会有告警提示。[查看表1](https://www.hiascend.com/document/detail/zh/canncommercial/82RC1/devaids/Profiling/mindieprofiling_0009.html#ZH-CN_TOPIC_0000002370256365__table1985410131831)" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "" +"Limit profiling to the specified domains to reduce data volume. String, " +"separated by semicolons, case-sensitive, e.g., \"Request; KVCache\".
Empty means all available domains.
Available domains: Request, " +"KVCache, ModelExecute, BatchSchedule, Communication.
Note: If the " +"selected domains are incomplete, analysis output may show warnings due to" +" missing data. See [Reference Table " +"1](https://www.hiascend.com/document/detail/zh/canncommercial/82RC1/devaids/Profiling/mindieprofiling_0009.html" +"#ZH-CN_TOPIC_0000002370256365__table1985410131831)." +msgstr "" +"设置采集指定domain域下的性能数据,减少采集数据量。输入参数为字符串格式,英文分号作为分隔符,区分大小写,例如:\"Request; " +"KVCache\"。
默认为空,表示采集当前所有domain域内性能数据。
当前已有domain域为:Request、KVCache、ModelExecute、BatchSchedule、Communication。说明:
若指定domain域不全,采集数据不满足解析输出件生成时,会有告警提示。[查看表1](https://www.hiascend.com/document/detail/zh/canncommercial/82RC1/devaids/Profiling/mindieprofiling_0009.html" +"#ZH-CN_TOPIC_0000002370256365__table1985410131831)" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:220 msgid "Example Configuration" msgstr "配置示例" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "2 Symbols Configuration File" -msgstr "2 点位配置文件说明" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:237 +msgid "6.2 Symbols Configuration File" +msgstr "6.2 点位配置文件说明" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "The symbols configuration file defines which functions/methods to profile and supports flexible configuration with custom attribute collection." +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:239 +msgid "" +"The symbols configuration file defines which functions/methods to profile" +" and supports flexible configuration with custom attribute collection." msgstr "点位配置文件用于定义需要采集的函数/方法,支持灵活配置与自定义属性采集。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "2.1 File Name and Loading" -msgstr "2.1 文件命名与加载" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:241 +msgid "File Name and Loading" +msgstr "6.2.1 文件命名与加载" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Default load path:`~/.config/vllm_ascend/service_profiling_symbols.MAJOR.MINOR.PATCH.yaml`( According to the installed version of vllm )" -msgstr "默认加载路径:`~/.config/vllm_ascend/service_profiling_symbols.MAJOR.MINOR.PATCH.yaml`(随已安装的 vllm 版本变化)" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:243 +msgid "" +"Default load " +"path:`~/.config/vllm_ascend/service_profiling_symbols.MAJOR.MINOR.PATCH.yaml`(According" +" to the installed version of vllm )" +msgstr "" +"默认加载路径:`~/.config/vllm_ascend/service_profiling_symbols.MAJOR.MINOR.PATCH.yaml`(随已安装的" +" vllm 版本变化)" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "If you need to customize the profiling points, it is highly recommended to copy a profiling configuration file to your working directory using the `PROFILING_SYMBOLS_PATH` environment variable." +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:245 +msgid "" +"If you need to customize the profiling points, it is highly recommended " +"to copy a profiling configuration file to your working directory using " +"the `PROFILING_SYMBOLS_PATH` environment variable." msgstr "如需自定义采集点,推荐通过设置环境变量`PROFILING_SYMBOLS_PATH`,将一份点位配置文件复制到工作目录进行修改使用。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "2.2 Field Descriptions" -msgstr "2.2 配置字段说明" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:247 +msgid "Field Descriptions" +msgstr "6.2.2 配置字段说明" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "Field" msgstr "字段" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "Example" msgstr "示例" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "symbol" msgstr "symbol" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "Python import path + attribute chain" msgstr "Python 导入路径 + 属性链" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "`\"vllm.v1.core.kv_cache_manager:KVCacheManager.free\"`" msgstr "`\"vllm.v1.core.kv_cache_manager:KVCacheManager.free\"`" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "handler" msgstr "handler" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "Handler type" msgstr "处理函数类型" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "`\"timer\"` (default) or `\"pkg.mod:func\"` (custom)" msgstr "`\"timer\"`(默认)或 `\"pkg.mod:func\"`(自定义)" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "domain" -msgstr "domain" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "Domain tag" msgstr "埋点域标识" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "`\"KVCache\"`, `\"ModelExecute\"`" msgstr "`\"KVCache\"`, `\"ModelExecute\"`" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "name" msgstr "name" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "Event name" -msgstr "埋点名称" +msgstr "事件名称" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "`\"EngineCoreExecute\"`" msgstr "`\"EngineCoreExecute\"`" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "min_version" msgstr "min_version" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "max_version" -msgstr "max_version" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "Upper version constraint" -msgstr "最高版本约束" +msgstr "版本上限约束" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Lower version constraint" -msgstr "最低版本约束" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "`\"0.9.1\"`" msgstr "`\"0.9.1\"`" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "max_version" +msgstr "max_version" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "Lower version constraint" +msgstr "版本下限约束" + +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "`\"0.11.0\"`" msgstr "`\"0.11.0\"`" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "attributes" msgstr "attributes" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md msgid "Custom attribute collection" msgstr "自定义属性采集" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Only support for `"timer"` handler. See the section below" -msgstr "只支持 `"timer"` handler。详见下方自定义属性采集机制" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md +msgid "Only supported for `\"timer\"` handler. See the section below" +msgstr "仅支持 `\"timer\"` 处理程序。请参阅下方章节" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "2.3 Examples" -msgstr "2.3 配置示例" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:259 +msgid "Examples" +msgstr "示例" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:261 msgid "Example 1: Custom handler" -msgstr "示例 1:自定义处理函数" +msgstr "示例 1:自定义处理程序" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:270 msgid "Example 2: Default timer" msgstr "示例 2:默认计时器" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:278 msgid "Example 3: Version constraint" msgstr "示例 3:版本约束" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "No handler specified -> default timer" -msgstr "未指定 handler -> 默认 timer" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:286 +msgid "Custom Attribute Collection" +msgstr "自定义属性采集" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "2.4 Custom Attribute Collection" -msgstr "2.4 自定义属性采集机制" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:288 +msgid "" +"The `attributes` field supports flexible custom attribute collection and " +"allows operations and transformations on function arguments and return " +"values." +msgstr "`attributes` 字段支持灵活的自定义属性采集,并允许对函数参数和返回值进行操作与转换。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "The `attributes` field supports flexible custom attribute collection and allows operations and transformations on function arguments and return values." -msgstr "`attributes` 字段支持灵活的自定义属性采集,可对函数参数与返回值进行多种操作与转换。" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:290 msgid "Basic Syntax" msgstr "基本语法" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:292 msgid "Argument access: use the parameter name directly, e.g., `input_ids`" -msgstr "参数访问:直接使用参数名,如 `input_ids`" +msgstr "参数访问:直接使用参数名,例如 `input_ids`" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:293 msgid "Return value access: use the `return` keyword" msgstr "返回值访问:使用 `return` 关键字" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:294 msgid "Pipeline operations: use `|` to chain multiple operations" -msgstr "管道操作:使用 `|` 分隔多个操作" +msgstr "管道操作:使用 `|` 连接多个操作" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:295 msgid "Attribute access: use `attr` to access object attributes" -msgstr "属性访问:使用 `attr` 获取对象属性" +msgstr "属性访问:使用 `attr` 访问对象属性" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Example" -msgstr "配置示例" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:312 msgid "Expression Notes" msgstr "表达式说明" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:314 msgid "`len(input_ids)`: get the length of parameter `input_ids`." -msgstr "`len(input_ids)`:获取 `input_ids` 参数的长度。" +msgstr "`len(input_ids)`:获取参数 `input_ids` 的长度。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:315 msgid "" "`len(return) | str`: get the length of the return value and convert to " "string (equivalent to `str(len(return))`)." -msgstr "`len(return) | str`:获取返回值长度并转换为字符串(等价于 `str(len(return))`)。" +msgstr "`len(return) | str`:获取返回值的长度并转换为字符串(等价于 `str(len(return))`)。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:316 msgid "" "`return[0] | attr input_ids | len`: get the length of the `input_ids` " "attribute of the first element in the return value." -msgstr "`return[0] | attr input_ids | len`:获取返回值第一个元素的 `input_ids` 属性长度。" +msgstr "`return[0] | attr input_ids | len`:获取返回值中第一个元素的 `input_ids` 属性长度。" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:318 msgid "Supported Expression Types" msgstr "支持的表达式类型" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:320 msgid "Basic operations: `len()`, `str()`, `int()`, `float()`" msgstr "基础操作:`len()`, `str()`, `int()`, `float()`" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:321 msgid "Index access: `return[0]`, `return['key']`" msgstr "索引访问:`return[0]`, `return['key']`" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:322 msgid "Attribute access: `return | attr attr_name`" msgstr "属性访问:`return | attr attr_name`" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:323 msgid "Pipeline composition: chain operations with `|`" -msgstr "管道组合:多个操作通过 `|` 连接" +msgstr "管道组合:使用 `|` 链接操作" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:325 msgid "Advanced Examples" msgstr "高级示例" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Get tensor shape" -msgstr "获取张量形状" +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:346 +msgid "Custom Handler" +msgstr "自定义处理程序" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Get specific value from a dict" -msgstr "获取字典中的特定值" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Conditional expression (requires custom handler support)" -msgstr "条件表达式(需要自定义处理函数支持)" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Complex data processing" -msgstr "复杂的数据处理" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "2.5 Custom Handler" -msgstr "2.5 自定义处理函数" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:348 msgid "" "When `handler` specifies a custom function, it must match the following " "signature:" -msgstr "当 `handler` 字段指定自定义处理函数时,该函数需满足以下签名:" +msgstr "当 `handler` 指定自定义函数时,该函数必须符合以下签名:" -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Custom handler" -msgstr "自定义处理函数" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "original_func: the original function object" -msgstr "original_func: 原始函数对象" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "this: the bound object (for methods)" -msgstr "this: 调用对象(对于方法调用)" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "*args: positional arguments" -msgstr "*args: 位置参数" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "**kwargs: keyword arguments" -msgstr "**kwargs: 关键字参数" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "processing result" -msgstr "处理结果" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md -msgid "Custom logic" -msgstr "自定义处理逻辑" - -#: ../../developer_guide/performance_and_debug/service_profiling_guide.md +#: ../../source/developer_guide/performance_and_debug/service_profiling_guide.md:368 msgid "" -"If the custom handler fails to import, the system will automatically fall " -"back to the default timer mode." -msgstr "若自定义处理函数导入失败,系统会自动回退至默认计时器模式。" +"If the custom handler fails to import, the system will automatically fall" +" back to the default timer mode." +msgstr "如果自定义处理程序导入失败,系统将自动回退到默认计时器模式。" + +#~ msgid "Quick Start" +#~ msgstr "快速开始" + +#~ msgid "cd ${path_to_store_profiling_files}" +#~ msgstr "cd ${profiling 文件存放路径}" + +#~ msgid "Set environment variable" +#~ msgstr "设置环境变量" + +#~ msgid "Start vLLM service" +#~ msgstr "启动 vLLM 服务" + +#~ msgid "" +#~ "xxxx-xxxx is the directory automatically" +#~ " created based on vLLM startup time" +#~ msgstr "xxxx-xxxx 是根据 vLLM 启动时间自动创建的目录" + +#~ msgid "Analyze data" +#~ msgstr "分析数据" + +#~ msgid "Appendix" +#~ msgstr "附录" + +#~ msgid "2.3 Examples" +#~ msgstr "2.3 示例" + +#~ msgid "No handler specified -> default timer" +#~ msgstr "未指定处理程序 -> 默认计时器" + +#~ msgid "Get tensor shape" +#~ msgstr "获取张量形状" + +#~ msgid "Get specific value from a dict" +#~ msgstr "从字典中获取特定值" + +#~ msgid "Conditional expression (requires custom handler support)" +#~ msgstr "条件表达式(需要自定义处理程序支持)" + +#~ msgid "Complex data processing" +#~ msgstr "复杂数据处理" + +#~ msgid "2.5 Custom Handler" +#~ msgstr "2.5 自定义处理程序" + +#~ msgid "original_func: the original function object" +#~ msgstr "original_func: 原始函数对象" + +#~ msgid "this: the bound object (for methods)" +#~ msgstr "this: 绑定对象(对于方法)" + +#~ msgid "*args: positional arguments" +#~ msgstr "*args: 位置参数" + +#~ msgid "**kwargs: keyword arguments" +#~ msgstr "**kwargs: 关键字参数" + +#~ msgid "processing result" +#~ msgstr "处理结果" + +#~ msgid "Custom logic" +#~ msgstr "自定义逻辑" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po b/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po index 6a8513be..b05c77da 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po @@ -4,140 +4,174 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../faqs.md:1 +#: ../../source/faqs.md:1 msgid "FAQs" -msgstr "" - -#: ../../faqs.md:3 -msgid "Version Specific FAQs" -msgstr "特定版本常见问题" - -#: ../../faqs.md:5 -msgid "" -"[[v0.7.3.post1] FAQ & Feedback](https://github.com/vllm-project/vllm-" -"ascend/issues/1007)" -msgstr "" -"[[v0.7.3.post1] 常见问题与反馈](https://github.com/vllm-project/vllm-" -"ascend/issues/1007)" - -#: ../../faqs.md:6 -msgid "" -"[[v0.9.2rc1] FAQ & Feedback](https://github.com/vllm-project/vllm-" -"ascend/issues/1742)" -msgstr "" -"[[v0.9.2rc1] 常见问题与反馈](https://github.com/vllm-project/vllm-" -"ascend/issues/1742)" - -#: ../../faqs.md:8 -msgid "General FAQs" msgstr "常见问题解答" -#: ../../faqs.md:10 +#: ../../source/faqs.md:3 +msgid "Version Specific FAQs" +msgstr "版本特定常见问题" + +#: ../../source/faqs.md:5 +msgid "" +"[[v0.17.0rc1] FAQ & Feedback](https://github.com/vllm-project/vllm-" +"ascend/issues/7173)" +msgstr "" +"[[v0.17.0rc1] 常见问题与反馈](https://github.com/vllm-project/vllm-" +"ascend/issues/7173)" + +#: ../../source/faqs.md:6 +msgid "" +"[[v0.13.0] FAQ & Feedback](https://github.com/vllm-project/vllm-" +"ascend/issues/6583)" +msgstr "" +"[[v0.13.0] 常见问题与反馈](https://github.com/vllm-project/vllm-" +"ascend/issues/6583)" + +#: ../../source/faqs.md:8 +msgid "General FAQs" +msgstr "通用常见问题" + +#: ../../source/faqs.md:10 msgid "1. What devices are currently supported?" msgstr "1. 目前支持哪些设备?" -#: ../../faqs.md:12 +#: ../../source/faqs.md:12 msgid "" -"Currently, **ONLY** Atlas A2 series(Ascend-cann-kernels-910b) and Atlas " -"300I(Ascend-cann-kernels-310p) series are supported:" +"Currently, **ONLY** Atlas A2 series (Ascend-cann-kernels-910b), Atlas A3 " +"series (Atlas-A3-cann-kernels) and Atlas 300I (Ascend-cann-kernels-310p) " +"series are supported:" msgstr "" -"目前,**仅**支持 Atlas A2 系列(Ascend-cann-kernels-910b)和 Atlas 300I(Ascend-cann-" -"kernels-310p)系列:" +"目前,**仅**支持 Atlas A2 系列(Ascend-cann-kernels-910b)、Atlas A3 系列(Atlas-A3-cann-kernels)和 Atlas 300I(Ascend-cann-kernels-310p)系列:" -#: ../../faqs.md:14 +#: ../../source/faqs.md:14 msgid "" "Atlas A2 Training series (Atlas 800T A2, Atlas 900 A2 PoD, Atlas 200T A2 " "Box16, Atlas 300T A2)" msgstr "" -"Atlas A2 训练系列(Atlas 800T A2,Atlas 900 A2 PoD,Atlas 200T A2 Box16,Atlas 300T " -"A2)" +"Atlas A2 训练系列(Atlas 800T A2、Atlas 900 A2 PoD、Atlas 200T A2 Box16、Atlas 300T A2)" -#: ../../faqs.md:15 +#: ../../source/faqs.md:15 msgid "Atlas 800I A2 Inference series (Atlas 800I A2)" msgstr "Atlas 800I A2 推理系列(Atlas 800I A2)" -#: ../../faqs.md:16 -msgid "Atlas 300I Inference series (Atlas 300I Duo)" -msgstr "Atlas 300I 推理系列(Atlas 300I Duo)" - -#: ../../faqs.md:18 -msgid "Below series are NOT supported yet:" -msgstr "以下系列目前尚不受支持:" - -#: ../../faqs.md:19 -msgid "Atlas 200I A2 (Ascend-cann-kernels-310b) unplanned yet" -msgstr "Atlas 200I A2(Ascend-cann-kernels-310b)尚未计划" - -#: ../../faqs.md:20 -msgid "Ascend 910, Ascend 910 Pro B (Ascend-cann-kernels-910) unplanned yet" -msgstr "Ascend 910,Ascend 910 Pro B(Ascend-cann-kernels-910)尚未计划" - -#: ../../faqs.md:22 +#: ../../source/faqs.md:16 msgid "" -"From a technical view, vllm-ascend support would be possible if the torch-" -"npu is supported. Otherwise, we have to implement it by using custom ops. We" -" are also welcome to join us to improve together." +"Atlas A3 Training series (Atlas 800T A3, Atlas 900 A3 SuperPoD, Atlas " +"9000 A3 SuperPoD)" msgstr "" -"从技术角度来看,如果支持 torch-npu,则可以支持 vllm-ascend。否则,我们需要通过自定义算子来实现。我们也欢迎大家一起加入,共同改进。" +"Atlas A3 训练系列(Atlas 800T A3、Atlas 900 A3 SuperPoD、Atlas 9000 A3 SuperPoD)" -#: ../../faqs.md:24 +#: ../../source/faqs.md:17 +msgid "Atlas 800I A3 Inference series (Atlas 800I A3)" +msgstr "Atlas 800I A3 推理系列(Atlas 800I A3)" + +#: ../../source/faqs.md:18 +msgid "[Experimental] Atlas 300I Inference series (Atlas 300I Duo)." +msgstr "[实验性] Atlas 300I 推理系列(Atlas 300I Duo)。" + +#: ../../source/faqs.md:19 +msgid "" +"[Experimental] Currently for 310I Duo the stable version is vllm-ascend " +"v0.10.0rc1." +msgstr "[实验性] 目前对于 310I Duo,稳定版本是 vllm-ascend v0.10.0rc1。" + +#: ../../source/faqs.md:21 +msgid "Below series are NOT supported yet:" +msgstr "以下系列目前尚不支持:" + +#: ../../source/faqs.md:23 +msgid "Atlas 200I A2 (Ascend-cann-kernels-310b) unplanned yet" +msgstr "Atlas 200I A2(Ascend-cann-kernels-310b)尚未计划支持" + +#: ../../source/faqs.md:24 +msgid "Ascend 910, Ascend 910 Pro B (Ascend-cann-kernels-910) unplanned yet" +msgstr "Ascend 910、Ascend 910 Pro B(Ascend-cann-kernels-910)尚未计划支持" + +#: ../../source/faqs.md:26 +msgid "" +"From a technical view, vllm-ascend supports devices if torch-npu is " +"supported. Otherwise, we have to implement it by using custom ops. We " +"also welcome you to join us to improve together." +msgstr "" +"从技术角度看,如果 torch-npu 支持某设备,则 vllm-ascend 也支持该设备。否则,我们需要通过自定义算子来实现。我们也欢迎您加入我们,共同改进。" + +#: ../../source/faqs.md:28 msgid "2. How to get our docker containers?" -msgstr "2. 如何获取我们的 docker 容器?" +msgstr "2. 如何获取我们的 Docker 容器?" -#: ../../faqs.md:26 +#: ../../source/faqs.md:30 msgid "" "You can get our containers at `Quay.io`, e.g., [vllm-" "ascend](https://quay.io/repository/ascend/vllm-ascend?tab=tags) and " "[cann](https://quay.io/repository/ascend/cann?tab=tags)." msgstr "" -"你可以在 `Quay.io` 获取我们的容器,例如,[vllm-" +"您可以在 `Quay.io` 获取我们的容器,例如:[vllm-" "ascend](https://quay.io/repository/ascend/vllm-ascend?tab=tags) 和 " "[cann](https://quay.io/repository/ascend/cann?tab=tags)。" -#: ../../faqs.md:28 +#: ../../source/faqs.md:32 msgid "" -"If you are in China, you can use `daocloud` to accelerate your downloading:" -msgstr "如果你在中国,可以使用 `daocloud` 来加速下载:" +"If you are in China, you can use `daocloud` or some other mirror sites to" +" accelerate your downloading:" +msgstr "如果您在中国,可以使用 `daocloud` 或其他镜像站点来加速下载:" -#: ../../faqs.md:36 +#: ../../source/faqs.md:42 +msgid "Load Docker Images for offline environment" +msgstr "为离线环境加载 Docker 镜像" + +#: ../../source/faqs.md:44 +msgid "" +"If you want to use container image for offline environments (no internet " +"connection), you need to download container image in an environment with " +"internet access:" +msgstr "如果您想在离线环境(无互联网连接)中使用容器镜像,您需要在有互联网访问权限的环境中下载容器镜像:" + +#: ../../source/faqs.md:46 +msgid "**Exporting Docker images:**" +msgstr "**导出 Docker 镜像:**" + +#: ../../source/faqs.md:58 +msgid "**Importing Docker images in environment without internet access:**" +msgstr "**在无互联网访问权限的环境中导入 Docker 镜像:**" + +#: ../../source/faqs.md:70 msgid "3. What models does vllm-ascend supports?" msgstr "3. vllm-ascend 支持哪些模型?" -#: ../../faqs.md:38 +#: ../../source/faqs.md:72 msgid "" -"Find more details [here](https://vllm-" -"ascend.readthedocs.io/en/latest/user_guide/support_matrix/supported_models.html)." +"Find more details " +"[here](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/support_matrix/supported_models.html)." msgstr "" -"在[此处](https://vllm-" -"ascend.readthedocs.io/en/latest/user_guide/support_matrix/supported_models.html)查看更多详细信息。" +"更多详细信息请参见[此处](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/support_matrix/supported_models.html)。" -#: ../../faqs.md:40 +#: ../../source/faqs.md:74 msgid "4. How to get in touch with our community?" msgstr "4. 如何与我们的社区取得联系?" -#: ../../faqs.md:42 +#: ../../source/faqs.md:76 msgid "" "There are many channels that you can communicate with our community " "developers / users:" -msgstr "你可以通过多种渠道与我们的社区开发者/用户进行交流:" +msgstr "您可以通过多种渠道与我们的社区开发者/用户进行交流:" -#: ../../faqs.md:44 +#: ../../source/faqs.md:78 msgid "" "Submit a GitHub [issue](https://github.com/vllm-project/vllm-" "ascend/issues?page=1)." @@ -145,335 +179,590 @@ msgstr "" "提交一个 GitHub [issue](https://github.com/vllm-project/vllm-" "ascend/issues?page=1)。" -#: ../../faqs.md:45 +#: ../../source/faqs.md:79 msgid "" "Join our [weekly " "meeting](https://docs.google.com/document/d/1hCSzRTMZhIB8vRq1_qOOjx4c9uYUxvdQvDsMV2JcSrw/edit?tab=t.0#heading=h.911qu8j8h35z)" " and share your ideas." -msgstr "" -"加入我们的[每周会议](https://docs.google.com/document/d/1hCSzRTMZhIB8vRq1_qOOjx4c9uYUxvdQvDsMV2JcSrw/edit?tab=t.0#heading=h.911qu8j8h35z),并分享你的想法。" +msgstr "参加我们的[每周例会](https://docs.google.com/document/d/1hCSzRTMZhIB8vRq1_qOOjx4c9uYUxvdQvDsMV2JcSrw/edit?tab=t.0#heading=h.911qu8j8h35z)并分享您的想法。" -#: ../../faqs.md:46 +#: ../../source/faqs.md:80 msgid "" "Join our [WeChat](https://github.com/vllm-project/vllm-" -"ascend/issues/227) group and ask your quenstions." -msgstr "" -"加入我们的 [微信群](https://github.com/vllm-project/vllm-ascend/issues/227) " -"并提问你的问题。" +"ascend/issues/227) group and ask your questions." +msgstr "加入我们的[微信群](https://github.com/vllm-project/vllm-ascend/issues/227)并提出您的问题。" -#: ../../faqs.md:47 +#: ../../source/faqs.md:81 msgid "" -"Join our ascend channel in [vLLM " -"forums](https://discuss.vllm.ai/c/hardware-support/vllm-ascend-" -"support/6) and publish your topics." -msgstr "" -"加入我们在 [vLLM 论坛](https://discuss.vllm.ai/c/hardware-support/vllm-" -"ascend-support/6) 的 ascend 频道并发布你的话题。" +"Join our ascend channel in [vLLM forums](https://discuss.vllm.ai/c" +"/hardware-support/vllm-ascend-support/6) and publish your topics." +msgstr "加入我们在 [vLLM 论坛](https://discuss.vllm.ai/c/hardware-support/vllm-ascend-support/6) 的 ascend 频道并发布您的主题。" -#: ../../faqs.md:49 +#: ../../source/faqs.md:83 msgid "5. What features does vllm-ascend V1 supports?" msgstr "5. vllm-ascend V1 支持哪些功能?" -#: ../../faqs.md:51 +#: ../../source/faqs.md:85 msgid "" -"Find more details [here](https://vllm-" -"ascend.readthedocs.io/en/latest/user_guide/support_matrix/supported_features.html)." +"Find more details " +"[here](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/support_matrix/supported_features.html)." msgstr "" -"在[这里](https://vllm-" -"ascend.readthedocs.io/en/latest/user_guide/support_matrix/supported_features.html)找到更多详细信息。" +"更多详细信息请参见[此处](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/support_matrix/supported_features.html)。" -#: ../../faqs.md:53 +#: ../../source/faqs.md:87 msgid "" "6. How to solve the problem of \"Failed to infer device type\" or " "\"libatb.so: cannot open shared object file\"?" -msgstr "6. 如何解决“无法推断设备类型”或“libatb.so:无法打开共享对象文件”问题?" +msgstr "6. 如何解决“无法推断设备类型”或“libatb.so:无法打开共享对象文件”的问题?" -#: ../../faqs.md:55 +#: ../../source/faqs.md:89 msgid "" "Basically, the reason is that the NPU environment is not configured " "correctly. You can:" -msgstr "基本上,原因是 NPU 环境没有正确配置。你可以:" +msgstr "基本上,原因是 NPU 环境未正确配置。您可以:" -#: ../../faqs.md:56 -msgid "" -"try `source /usr/local/Ascend/nnal/atb/set_env.sh` to enable NNAL package." +#: ../../source/faqs.md:91 +msgid "try `source /usr/local/Ascend/nnal/atb/set_env.sh` to enable NNAL package." msgstr "尝试运行 `source /usr/local/Ascend/nnal/atb/set_env.sh` 以启用 NNAL 包。" -#: ../../faqs.md:57 +#: ../../source/faqs.md:92 msgid "" "try `source /usr/local/Ascend/ascend-toolkit/set_env.sh` to enable CANN " "package." msgstr "尝试运行 `source /usr/local/Ascend/ascend-toolkit/set_env.sh` 以启用 CANN 包。" -#: ../../faqs.md:58 +#: ../../source/faqs.md:93 msgid "try `npu-smi info` to check whether the NPU is working." msgstr "尝试运行 `npu-smi info` 来检查 NPU 是否正常工作。" -#: ../../faqs.md:60 +#: ../../source/faqs.md:95 msgid "" -"If all above steps are not working, you can try the following code with " -"python to check whether there is any error:" -msgstr "如果以上所有步骤都无效,你可以尝试使用以下 python 代码来检查是否有错误:" +"If the above steps are not working, you can try the following code in " +"Python to check whether there are any errors:" +msgstr "如果上述步骤无效,您可以在 Python 中尝试以下代码来检查是否有任何错误:" -#: ../../faqs.md:68 +#: ../../source/faqs.md:103 +msgid "If all above steps are not working, feel free to submit a GitHub issue." +msgstr "如果以上所有步骤都无法解决问题,请随时提交一个 GitHub issue。" + +#: ../../source/faqs.md:105 +msgid "7. How vllm-ascend work with vLLM?" +msgstr "7. vllm-ascend 如何与 vLLM 协同工作?" + +#: ../../source/faqs.md:107 msgid "" -"If all above steps are not working, feel free to submit a GitHub issue." -msgstr "如果以上所有步骤都无法解决问题,欢迎提交一个 GitHub issue。" - -#: ../../faqs.md:70 -msgid "7. How does vllm-ascend perform?" -msgstr "7. vllm-ascend 的性能如何?" - -#: ../../faqs.md:72 -msgid "" -"Currently, only some models are improved. Such as `Qwen2.5 VL`, `Qwen3`, " -"`Deepseek V3`. Others are not good enough. From 0.9.0rc2, Qwen and Deepseek" -" works with graph mode to play a good performance. What's more, you can " -"install `mindie-turbo` with `vllm-ascend v0.7.3` to speed up the inference " -"as well." +"`vllm-ascend` is a hardware plugin for vLLM. The version of `vllm-ascend`" +" is the same as the version of `vllm`. For example, if you use `vllm` " +"0.9.1, you should use vllm-ascend 0.9.1 as well. For the main branch, we " +"ensure that `vllm-ascend` and `vllm` are compatible at every commit." msgstr "" -"目前,只有部分模型得到了改进,比如 `Qwen2.5 VL`、`Qwen3` 和 `Deepseek V3`。其他模型的效果还不够理想。从 " -"0.9.0rc2 开始,Qwen 和 Deepseek 已经支持图模式,以获得更好的性能。此外,你还可以在 `vllm-ascend v0.7.3` " -"上安装 `mindie-turbo`,进一步加速推理。" +"`vllm-ascend` 是 vLLM 的一个硬件插件。`vllm-ascend` 的版本与 `vllm` 的版本相同。例如,如果您使用 `vllm` 0.9.1,您也应该使用 vllm-ascend 0.9.1。对于主分支,我们确保 `vllm-ascend` 和 `vllm` 在每次提交时都是兼容的。" -#: ../../faqs.md:74 -msgid "8. How vllm-ascend work with vllm?" -msgstr "8. vllm-ascend 如何与 vllm 协同工作?" +#: ../../source/faqs.md:109 +msgid "8. Does vllm-ascend support Prefill Disaggregation feature?" +msgstr "8. vllm-ascend 是否支持 Prefill Disaggregation 功能?" -#: ../../faqs.md:75 +#: ../../source/faqs.md:111 msgid "" -"vllm-ascend is a plugin for vllm. Basically, the version of vllm-ascend is " -"the same as the version of vllm. For example, if you use vllm 0.7.3, you " -"should use vllm-ascend 0.7.3 as well. For main branch, we will make sure " -"`vllm-ascend` and `vllm` are compatible by each commit." +"Yes, vllm-ascend supports Prefill Disaggregation feature with Mooncake " +"backend. See the [official " +"tutorial](https://docs.vllm.ai/projects/ascend/en/latest/tutorials/features/pd_disaggregation_mooncake_multi_node.html)" +" for example." msgstr "" -"vllm-ascend 是 vllm 的一个插件。基本上,vllm-ascend 的版本与 vllm 的版本是相同的。例如,如果你使用 vllm " -"0.7.3,你也应该使用 vllm-ascend 0.7.3。对于主分支,我们会确保每次提交都让 `vllm-ascend` 和 `vllm` " -"保持兼容。" +"是的,vllm-ascend 支持通过 Mooncake 后端实现 Prefill Disaggregation 功能。示例请参见[官方教程](https://docs.vllm.ai/projects/ascend/en/latest/tutorials/features/pd_disaggregation_mooncake_multi_node.html)。" -#: ../../faqs.md:77 -msgid "9. Does vllm-ascend support Prefill Disaggregation feature?" -msgstr "9. vllm-ascend 支持 Prefill Disaggregation 功能吗?" +#: ../../source/faqs.md:113 +msgid "9. Does vllm-ascend support quantization method?" +msgstr "9. vllm-ascend 是否支持量化方法?" -#: ../../faqs.md:79 +#: ../../source/faqs.md:115 msgid "" -"Currently, only 1P1D is supported on V0 Engine. For V1 Engine or NPND " -"support, We will make it stable and supported by vllm-ascend in the future." -msgstr "目前,V0引擎只支持1P1D。对于V1引擎或NPND的支持,我们将在未来使其稳定并由vllm-ascend支持。" +"Currently, w8a8, w4a8, and w4a4 quantization methods are already " +"supported by vllm-ascend." +msgstr "目前,vllm-ascend 已支持 w8a8、w4a8 和 w4a4 量化方法。" -#: ../../faqs.md:81 -msgid "10. Does vllm-ascend support quantization method?" -msgstr "10. vllm-ascend 支持量化方法吗?" +#: ../../source/faqs.md:117 +msgid "10. How is vllm-ascend tested?" +msgstr "10. vllm-ascend 是如何测试的?" -#: ../../faqs.md:83 +#: ../../source/faqs.md:119 msgid "" -"Currently, w8a8 quantization is already supported by vllm-ascend originally " -"on v0.8.4rc2 or higher, If you're using vllm 0.7.3 version, w8a8 " -"quantization is supporeted with the integration of vllm-ascend and mindie-" -"turbo, please use `pip install vllm-ascend[mindie-turbo]`." +"vllm-ascend is tested in three aspects: functions, performance, and " +"accuracy." +msgstr "vllm-ascend 在三个方面进行测试:功能、性能和精度。" + +#: ../../source/faqs.md:121 +msgid "" +"**Functional test**: We added CI, including part of vllm's native unit " +"tests and vllm-ascend's own unit tests. In vllm-ascend's tests, we test " +"basic functionalities, popular model availability, and [supported " +"features](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/support_matrix/supported_features.html)" +" through E2E test." msgstr "" -"目前,w8a8 量化已在 v0.8.4rc2 或更高版本的 vllm-ascend 中原生支持。如果你使用的是 vllm 0.7.3 版本,集成了 " -"vllm-ascend 和 mindie-turbo 后也支持 w8a8 量化,请使用 `pip install vllm-ascend[mindie-" -"turbo]`。" +"**功能测试**:我们添加了 CI,包括部分 vllm 的原生单元测试和 vllm-ascend 自身的单元测试。在 vllm-ascend 的测试中,我们通过端到端测试来验证基本功能、主流模型的可用性以及[支持的功能](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/support_matrix/supported_features.html)。" -#: ../../faqs.md:85 -msgid "11. How to run w8a8 DeepSeek model?" -msgstr "11. 如何运行 w8a8 DeepSeek 模型?" - -#: ../../faqs.md:87 +#: ../../source/faqs.md:123 msgid "" -"Please following the [inferencing tutorial](https://vllm-" -"ascend.readthedocs.io/en/latest/tutorials/multi_node.html) and replace model" -" to DeepSeek." +"**Performance test**: We provide [benchmark](https://github.com/vllm-" +"project/vllm-ascend/tree/main/benchmarks) tools for E2E performance " +"benchmark, which can be easily re-run locally. We will publish a perf " +"website to show the performance test results for each pull request." msgstr "" -"请按照[inferencing 教程](https://vllm-" -"ascend.readthedocs.io/en/latest/tutorials/multi_node.html)进行操作,并将模型更换为 " -"DeepSeek。" +"**性能测试**:我们提供了用于端到端性能基准测试的[基准测试](https://github.com/vllm-project/vllm-ascend/tree/main/benchmarks)工具,可以方便地在本地重新运行。我们将发布一个性能网站,展示每个拉取请求的性能测试结果。" -#: ../../faqs.md:89 +#: ../../source/faqs.md:125 msgid "" -"12. There is no output in log when loading models using vllm-ascend, How to " -"solve it?" -msgstr "12. 使用 vllm-ascend 加载模型时日志没有输出,如何解决?" +"**Accuracy test**: We are working on adding accuracy test to the CI as " +"well." +msgstr "**准确性测试**:我们正在努力将准确性测试也添加到CI中。" -#: ../../faqs.md:91 +#: ../../source/faqs.md:127 msgid "" -"If you're using vllm 0.7.3 version, this is a known progress bar display " -"issue in VLLM, which has been resolved in [this PR](https://github.com/vllm-" -"project/vllm/pull/12428), please cherry-pick it locally by yourself. " -"Otherwise, please fill up an issue." +"**Nightly test**: we'll run full test every night to make sure the code " +"is working." +msgstr "**夜间测试**:我们将每晚运行完整测试,以确保代码正常工作。" + +#: ../../source/faqs.md:129 +msgid "" +"For each release, we'll publish the performance test and accuracy test " +"report in the future." +msgstr "对于每个版本,我们未来都将发布性能测试和准确性测试报告。" + +#: ../../source/faqs.md:131 +msgid "11. How to fix the error \"InvalidVersion\" when using vllm-ascend?" +msgstr "11. 使用 vllm-ascend 时如何修复 \"InvalidVersion\" 错误?" + +#: ../../source/faqs.md:133 +msgid "" +"The problem is usually caused by the installation of a development or " +"editable version of the vLLM package. In this case, we provide the " +"environment variable `VLLM_VERSION` to let users specify the version of " +"vLLM package to use. Please set the environment variable `VLLM_VERSION` " +"to the version of the vLLM package you have installed. The format of " +"`VLLM_VERSION` should be `X.Y.Z`." msgstr "" -"如果你正在使用 vllm 0.7.3 版本,这是 VLLM 已知的进度条显示问题,已在 [此 PR](https://github.com/vllm-" -"project/vllm/pull/12428) 中解决,请自行在本地进行 cherry-pick。否则,请提交一个 issue。" - -#: ../../faqs.md:93 -msgid "13. How vllm-ascend is tested" -msgstr "13. 如何测试 vllm-ascend" - -#: ../../faqs.md:95 -msgid "" -"vllm-ascend is tested by functional test, performance test and accuracy " -"test." -msgstr "vllm-ascend 经过功能测试、性能测试和精度测试。" - -#: ../../faqs.md:97 -msgid "" -"**Functional test**: we added CI, includes portion of vllm's native unit " -"tests and vllm-ascend's own unit tests,on vllm-ascend's test, we test basic " -"functionality、popular models availability and [supported " -"features](https://vllm-" -"ascend.readthedocs.io/en/latest/user_guide/support_matrix/supported_features.html)" -" via e2e test" -msgstr "" -"**功能测试**:我们添加了CI,包含了vllm原生单元测试的一部分以及vllm-ascend自己的单元测试。在vllm-" -"ascend的测试中,我们通过e2e测试验证了基本功能、主流模型可用性和[支持的特性](https://vllm-" -"ascend.readthedocs.io/en/latest/user_guide/support_matrix/supported_features.html)。" - -#: ../../faqs.md:99 -msgid "" -"**Performance test**: we provide [benchmark](https://github.com/vllm-" -"project/vllm-ascend/tree/main/benchmarks) tools for end-to-end performance " -"benchmark which can easily to re-route locally, we'll publish a perf website" -" to show the performance test results for each pull request" -msgstr "" -"**性能测试**:我们提供了用于端到端性能基准测试的[基准测试](https://github.com/vllm-project/vllm-" -"ascend/tree/main/benchmarks)工具,可以方便地在本地重新运行。我们将发布一个性能网站,用于展示每个拉取请求的性能测试结果。" - -#: ../../faqs.md:101 -msgid "" -"**Accuracy test**: we're working on adding accuracy test to CI as well." -msgstr "**准确性测试**:我们也在努力将准确性测试添加到CI中。" - -#: ../../faqs.md:103 -msgid "" -"Finnall, for each release, we'll publish the performance test and accuracy " -"test report in the future." -msgstr "最后,未来每个版本发布时,我们都会公开性能测试和准确性测试报告。" - -#: ../../faqs.md:105 -msgid "14. How to fix the error \"InvalidVersion\" when using vllm-ascend?" -msgstr "14. 使用 vllm-ascend 时如何解决 “InvalidVersion” 错误?" - -#: ../../faqs.md:106 -msgid "" -"It's usually because you have installed an dev/editable version of vLLM " -"package. In this case, we provide the env variable `VLLM_VERSION` to let " -"users specify the version of vLLM package to use. Please set the env " -"variable `VLLM_VERSION` to the version of vLLM package you have installed. " -"The format of `VLLM_VERSION` should be `X.Y.Z`." -msgstr "" -"这通常是因为你安装了开发版或可编辑版本的 vLLM 包。在这种情况下,我们提供了环境变量 `VLLM_VERSION`,以便用户指定要使用的 vLLM " +"此问题通常是由于安装了开发版或可编辑版本的 vLLM 包引起的。为此,我们提供了环境变量 `VLLM_VERSION`,允许用户指定要使用的 vLLM " "包版本。请将环境变量 `VLLM_VERSION` 设置为你已安装的 vLLM 包的版本。`VLLM_VERSION` 的格式应为 `X.Y.Z`。" -#: ../../faqs.md:108 -msgid "15. How to handle Out Of Memory?" -msgstr "15. 如何处理内存溢出?" +#: ../../source/faqs.md:135 +msgid "12. How to handle the out-of-memory issue?" +msgstr "12. 如何处理内存不足问题?" -#: ../../faqs.md:109 +#: ../../source/faqs.md:137 msgid "" -"OOM errors typically occur when the model exceeds the memory capacity of a " -"single NPU. For general guidance, you can refer to [vLLM's OOM " +"OOM errors typically occur when the model exceeds the memory capacity of " +"a single NPU. For general guidance, you can refer to [vLLM OOM " "troubleshooting " -"documentation](https://docs.vllm.ai/en/latest/getting_started/troubleshooting.html#out-" +"documentation](https://docs.vllm.ai/en/latest/usage/troubleshooting/#out-" "of-memory)." msgstr "" -"当模型超出单个 NPU 的内存容量时,通常会发生 OOM(内存溢出)错误。一般性的指导可以参考 [vLLM 的 OOM " -"故障排除文档](https://docs.vllm.ai/en/latest/getting_started/troubleshooting.html#out-" -"of-memory)。" +"当模型超出单个 NPU 的内存容量时,通常会发生 OOM(内存不足)错误。一般性指导可参考 [vLLM OOM 故障排除文档](https://docs.vllm.ai/en/latest/usage/troubleshooting/#out-of-memory)。" -#: ../../faqs.md:111 +#: ../../source/faqs.md:139 msgid "" -"In scenarios where NPUs have limited HBM (High Bandwidth Memory) capacity, " -"dynamic memory allocation/deallocation during inference can exacerbate " -"memory fragmentation, leading to OOM. To address this:" -msgstr "" -"在 NPU 的 HBM(高带宽内存)容量有限的场景下,推理过程中动态内存分配和释放会加剧内存碎片,从而导致 OOM(内存溢出)。为了解决这个问题:" +"In scenarios where NPUs have limited high bandwidth memory (HBM) " +"capacity, dynamic memory allocation/deallocation during inference can " +"exacerbate memory fragmentation, leading to OOM. To address this:" +msgstr "在 NPU 的高带宽内存容量有限的场景下,推理过程中的动态内存分配/释放会加剧内存碎片,导致 OOM。为解决此问题:" -#: ../../faqs.md:113 +#: ../../source/faqs.md:141 msgid "" -"**Adjust `--gpu-memory-utilization`**: If unspecified, will use the default " -"value of `0.9`. You can decrease this param to reserve more memory to reduce" -" fragmentation risks. See more note in: [vLLM - Inference and Serving - " -"Engine " -"Arguments](https://docs.vllm.ai/en/latest/serving/engine_args.html#vllm.engine.arg_utils-" -"_engine_args_parser-cacheconfig)." -msgstr "" -"**调整 `--gpu-memory-utilization`**:如果未指定,将使用默认值 " -"`0.9`。你可以降低此参数来预留更多内存,从而降低内存碎片风险。参见更多说明:[vLLM - 推理与服务 - " -"引擎参数](https://docs.vllm.ai/en/latest/serving/engine_args.html#vllm.engine.arg_utils-" -"_engine_args_parser-cacheconfig)。" +"**Limit `--max-model-len`**: It can save the HBM usage for KV cache " +"initialization step." +msgstr "**限制 `--max-model-len`**:它可以节省 KV 缓存初始化步骤的 HBM 使用量。" -#: ../../faqs.md:115 +#: ../../source/faqs.md:143 +msgid "" +"**Adjust `--gpu-memory-utilization`**: If unspecified, the default value " +"is `0.9`. You can decrease this value to reserve more memory to reduce " +"fragmentation risks. See details in: [vLLM - Inference and Serving - " +"Engine Arguments](https://docs.vllm.ai/en/latest/cli/serve/#-gpu-memory-" +"utilization)." +msgstr "" +"**调整 `--gpu-memory-utilization`**:如果未指定,默认值为 `0.9`。你可以降低此值以预留更多内存,从而减少碎片风险。详情参见:[vLLM - 推理与服务 - 引擎参数](https://docs.vllm.ai/en/latest/cli/serve/#-gpu-memory-utilization)。" + +#: ../../source/faqs.md:145 msgid "" "**Configure `PYTORCH_NPU_ALLOC_CONF`**: Set this environment variable to " -"optimize NPU memory management. For example, you can `export " -"PYTORCH_NPU_ALLOC_CONF=expandable_segments:True` to enable virtual memory " -"feature to mitigate memory fragmentation caused by frequent dynamic memory " -"size adjustments during runtime, see more note in: " +"optimize NPU memory management. For example, you can use `export " +"PYTORCH_NPU_ALLOC_CONF=expandable_segments:True` to enable virtual memory" +" feature to mitigate memory fragmentation caused by frequent dynamic " +"memory size adjustments during runtime. See details in " "[PYTORCH_NPU_ALLOC_CONF](https://www.hiascend.com/document/detail/zh/Pytorch/700/comref/Envvariables/Envir_012.html)." msgstr "" -"**配置 `PYTORCH_NPU_ALLOC_CONF`**:设置此环境变量以优化NPU内存管理。例如,你可以通过 `export " +"**配置 `PYTORCH_NPU_ALLOC_CONF`**:设置此环境变量以优化 NPU 内存管理。例如,你可以使用 `export " "PYTORCH_NPU_ALLOC_CONF=expandable_segments:True` " -"来启用虚拟内存功能,以缓解运行时频繁动态调整内存大小导致的内存碎片问题,更多说明参见:[PYTORCH_NPU_ALLOC_CONF](https://www.hiascend.com/document/detail/zh/Pytorch/700/comref/Envvariables/Envir_012.html)。" +"来启用虚拟内存功能,以缓解运行时频繁动态调整内存大小导致的内存碎片问题。详情参见:[PYTORCH_NPU_ALLOC_CONF](https://www.hiascend.com/document/detail/zh/Pytorch/700/comref/Envvariables/Envir_012.html)。" -#: ../../faqs.md:117 -msgid "16. Failed to enable NPU graph mode when running DeepSeek?" -msgstr "16. 运行 DeepSeek 时无法启用 NPU 图模式?" +#: ../../source/faqs.md:147 +msgid "13. Failed to enable NPU graph mode when running DeepSeek" +msgstr "13. 运行 DeepSeek 时无法启用 NPU 图模式" -#: ../../faqs.md:118 -#, python-brace-format +#: ../../source/faqs.md:149 msgid "" -"You may encounter the following error if running DeepSeek with NPU graph " -"mode enabled. The allowed number of queries per kv when enabling both MLA " -"and Graph mode only support {32, 64, 128}, **Thus this is not supported for " -"DeepSeek-V2-Lite**, as it only has 16 attention heads. The NPU graph mode " -"support on DeepSeek-V2-Lite will be done in the future." +"Enabling NPU graph mode for DeepSeek may trigger an error. This is " +"because when both MLA and NPU graph mode are active, the number of " +"queries per KV head must be 32, 64, or 128. However, DeepSeek-V2-Lite has" +" only 16 attention heads, which results in 16 queries per KV—a value " +"outside the supported range. Support for NPU graph mode on " +"DeepSeek-V2-Lite will be added in a future update." msgstr "" -"如果在启用NPU图模式(Graph " -"mode)运行DeepSeek时,您可能会遇到以下错误。当同时启用MLA和图模式时,每个kv允许的查询数只支持{32, 64, " -"128},**因此这不支持DeepSeek-V2-Lite**,因为它只有16个注意力头。未来会增加对DeepSeek-V2-Lite在NPU图模式下的支持。" +"为 DeepSeek 启用 NPU 图模式可能会触发错误。这是因为当 MLA 和 NPU 图模式同时激活时,每个 KV 头的查询数必须为 32、64 或 " +"128。然而,DeepSeek-V2-Lite 只有 16 个注意力头,导致每个 KV 有 16 个查询,该值超出了支持范围。对 " +"DeepSeek-V2-Lite 的 NPU 图模式支持将在未来的更新中添加。" -#: ../../faqs.md:120 -#, python-brace-format +#: ../../source/faqs.md:151 msgid "" -"And if you're using DeepSeek-V3 or DeepSeek-R1, please make sure after the " -"tensor parallel split, num_heads / num_kv_heads in {32, 64, 128}." +"And if you're using DeepSeek-V3 or DeepSeek-R1, please make sure after " +"the tensor parallel split, `num_heads`/`num_kv_heads` is {32, 64, 128}." msgstr "" -"如果你正在使用 DeepSeek-V3 或 DeepSeek-R1,请确保在张量并行切分后,num_heads / num_kv_heads 的值为 " -"{32, 64, 128} 中的一个。" +"如果你正在使用 DeepSeek-V3 或 DeepSeek-R1,请确保在张量并行切分后,`num_heads`/`num_kv_heads` 的值为 {32, 64, 128} 中的一个。" -#: ../../faqs.md:127 +#: ../../source/faqs.md:158 msgid "" -"17. Failed to reinstall vllm-ascend from source after uninstalling vllm-" -"ascend?" -msgstr "17. 卸载 vllm-ascend 后无法从源码重新安装 vllm-ascend?" +"14. Failed to reinstall vllm-ascend from source after uninstalling vllm-" +"ascend" +msgstr "14. 卸载 vllm-ascend 后无法从源码重新安装 vllm-ascend" -#: ../../faqs.md:128 +#: ../../source/faqs.md:160 msgid "" -"You may encounter the problem of C compilation failure when reinstalling " -"vllm-ascend from source using pip. If the installation fails, it is " -"recommended to use `python setup.py install` to install, or use `python " -"setup.py clean` to clear the cache." +"You may encounter the problem of C/C++ compilation failure when " +"reinstalling vllm-ascend from source using pip. If the installation " +"fails, use `python setup.py install` (recommended) to install, or use " +"`python setup.py clean` to clear the cache." msgstr "" -"当你使用 pip 从源码重新安装 vllm-ascend 时,可能会遇到 C 编译失败的问题。如果安装失败,建议使用 `python setup.py " -"install` 进行安装,或者使用 `python setup.py clean` 清除缓存。" +"使用 pip 从源码重新安装 vllm-ascend 时,可能会遇到 C/C++ 编译失败的问题。如果安装失败,请使用 `python setup.py install`(推荐)进行安装,或使用 `python setup.py clean` 清除缓存。" -#: ../../faqs.md:130 -msgid "18. How to generate deterministic results when using vllm-ascend?" -msgstr "18. 使用 vllm-ascend 时如何生成确定性结果?" +#: ../../source/faqs.md:162 +msgid "15. How to generate deterministic results when using vllm-ascend?" +msgstr "15. 使用 vllm-ascend 时如何生成确定性结果?" -#: ../../faqs.md:131 -msgid "There are several factors that affect output certainty:" +#: ../../source/faqs.md:164 +msgid "There are several factors that affect output determinism:" msgstr "有几个因素会影响输出的确定性:" -#: ../../faqs.md:133 +#: ../../source/faqs.md:166 msgid "" -"Sampler Method: using **Greedy sample** by setting `temperature=0` in " +"Sampler method: using **greedy sampling** by setting `temperature=0` in " "`SamplingParams`, e.g.:" msgstr "" -"采样方法:通过在 `SamplingParams` 中设置 `temperature=0` 来使用 **贪婪采样(Greedy " -"sample)**,例如:" +"采样方法:通过在 `SamplingParams` 中设置 `temperature=0` 来使用 **贪婪采样**,例如:" -#: ../../faqs.md:158 +#: ../../source/faqs.md:191 msgid "Set the following environment parameters:" msgstr "设置以下环境参数:" + +#: ../../source/faqs.md:200 +msgid "" +"16. How to fix the error \"ImportError: Please install vllm[audio] for " +"audio support\" for the Qwen2.5-Omni model?" +msgstr "16. 对于 Qwen2.5-Omni 模型,如何修复 \"ImportError: Please install vllm[audio] for audio support\" 错误?" + +#: ../../source/faqs.md:202 +msgid "" +"The `Qwen2.5-Omni` model requires the `librosa` package to be installed, " +"you need to install the `qwen-omni-utils` package to ensure all " +"dependencies are met, run `pip install qwen-omni-utils`. This package " +"will install `librosa` and its related dependencies, resolving the " +"`ImportError: No module named 'librosa'` issue and ensuring that the " +"audio processing functionality works correctly." +msgstr "" +"`Qwen2.5-Omni` 模型需要安装 `librosa` 包,你需要安装 `qwen-omni-utils` 包以确保满足所有依赖,运行 `pip install qwen-omni-utils`。此包将安装 `librosa` 及其相关依赖,解决 `ImportError: No module named 'librosa'` 问题,并确保音频处理功能正常工作。" + +#: ../../source/faqs.md:205 +msgid "" +"17. How to troubleshoot and resolve size capture failures resulting from " +"stream resource exhaustion, and what are the underlying causes?" +msgstr "17. 如何排查和解决因流资源耗尽导致的尺寸捕获失败,其根本原因是什么?" + +#: ../../source/faqs.md:213 +msgid "Recommended mitigation strategies:" +msgstr "推荐的缓解策略:" + +#: ../../source/faqs.md:215 +msgid "" +"Manually configure the compilation_config parameter with a reduced size " +"set: '{\"cudagraph_capture_sizes\":[size1, size2, size3, ...]}'." +msgstr "手动配置 compilation_config 参数,使用缩减后的尺寸集合:'{\"cudagraph_capture_sizes\":[size1, size2, size3, ...]}'。" + +#: ../../source/faqs.md:216 +msgid "" +"Employ ACLgraph's full graph mode as an alternative to the piecewise " +"approach." +msgstr "采用 ACLgraph 的全图模式作为分段方法的替代方案。" + +#: ../../source/faqs.md:218 +msgid "" +"Root cause analysis: The current stream requirement calculation for size " +"captures only accounts for measurable factors including: data parallel " +"size, tensor parallel size, expert parallel configuration, piece graph " +"count, multistream-overlap shared expert settings, and HCCL communication" +" mode (AIV/AICPU). However, numerous unquantifiable elements, such as " +"operator characteristics and specific hardware features, consume " +"additional streams outside of this calculation framework, resulting in " +"stream resource exhaustion during size capture operations." +msgstr "" +"根本原因分析:当前尺寸捕获的流需求计算仅考虑了可测量的因素,包括:数据并行大小、张量并行大小、专家并行配置、分段图数量、多流重叠共享专家设置以及 HCCL 通信模式(AIV/AICPU)。然而,许多不可量化的因素,例如算子特性和特定硬件特性,在此计算框架之外消耗了额外的流,导致尺寸捕获操作期间流资源耗尽。" + +#: ../../source/faqs.md:221 +msgid "18. How to install custom version of torch_npu?" +msgstr "18. 如何安装自定义版本的 torch_npu?" + +#: ../../source/faqs.md:223 +msgid "" +"torch-npu will be overridden when installing vllm-ascend. If you need to" +" install a specific version of torch-npu, you can manually install the " +"specified version of torch-npu after vllm-ascend is installed." +msgstr "安装 vllm-ascend 时会覆盖 torch-npu。如果你需要安装特定版本的 torch-npu,可以在 vllm-ascend 安装后手动安装指定版本的 torch-npu。" + +#: ../../source/faqs.md:225 +msgid "" +"19. On certain systems (e.g., Kylin OS), `docker pull` may fail with an " +"`invalid tar header` error" +msgstr "19. 在某些系统上(例如 Kylin OS),`docker pull` 可能因 `invalid tar header` 错误而失败" + +#: ../../source/faqs.md:227 +msgid "" +"On certain operating systems, such as Kylin OS, you may encounter an " +"`invalid tar header` error during the `docker pull` process:" +msgstr "在某些操作系统上,例如 Kylin OS,你可能会在 `docker pull` 过程中遇到 `invalid tar header` 错误:" + +#: ../../source/faqs.md:233 +msgid "" +"This is often due to system compatibility issues. You can resolve this by" +" using an offline loading method with a second machine." +msgstr "这通常是由于系统兼容性问题。你可以使用第二台机器通过离线加载方法来解决此问题。" + +#: ../../source/faqs.md:235 +msgid "" +"On a separate host machine (e.g., a standard Ubuntu server), pull the " +"image for the target ARM64 architecture and package it into a `.tar` " +"file." +msgstr "在一台独立的主机上(例如,标准的 Ubuntu 服务器),拉取目标 ARM64 架构的镜像并将其打包成 `.tar` 文件。" + +#: ../../source/faqs.md:248 +msgid "Transfer the image archive" +msgstr "传输镜像归档文件" + +#: ../../source/faqs.md:250 +msgid "" +"Copy the `vllm_ascend_.tar` file (where `` is the image tag you" +" used) to your target machine" +msgstr "将 `vllm_ascend_.tar` 文件(其中 `` 是你使用的镜像标签)复制到你的目标机器" + +#: ../../source/faqs.md:252 +msgid "" +"20. Why am I getting an error when executing the script to start a Docker" +" container? The error message is: \"operation not permitted\"" +msgstr "20. 为什么执行启动 Docker 容器的脚本时会出错?错误信息是:\"operation not permitted\"" + +#: ../../source/faqs.md:254 +msgid "" +"When using `--shm-size`, you may need to add the `--privileged=true` flag" +" to your `docker run` command to grant the container necessary " +"permissions. Please be aware that using `--privileged=true` grants the " +"container extensive privileges on the host system, which can be a " +"security risk. Only use this option if you understand the implications " +"and trust the container's source." +msgstr "" +"使用 `--shm-size` 时,你可能需要在 `docker run` 命令中添加 `--privileged=true` 标志,以授予容器必要的权限。请注意,使用 `--privileged=true` 会授予容器在主机系统上的广泛权限,这可能带来安全风险。只有在理解其影响并信任容器来源的情况下才使用此选项。" + +#: ../../source/faqs.md:256 +msgid "21. How to achieve low latency in a small batch scenario?" +msgstr "21. 如何在小批量场景下实现低延迟?" + +#: ../../source/faqs.md:258 +msgid "" +"The performance of `torch_npu.npu_fused_infer_attention_score` in small " +"batch scenarios is not satisfactory, mainly due to the lack of flash " +"decoding function. We offer an alternative operator in " +"`tools/install_flash_infer_attention_score_ops_a2.sh` and " +"`tools/install_flash_infer_attention_score_ops_a3.sh`, you can install it" +" using the following instruction:" +msgstr "" +"`torch_npu.npu_fused_infer_attention_score` 在小批量场景下的性能不理想,主要是由于缺乏 Flash Decoding 功能。我们在 `tools/install_flash_infer_attention_score_ops_a2.sh` 和 `tools/install_flash_infer_attention_score_ops_a3.sh` 中提供了一个替代算子,你可以使用以下指令安装它:" + +#: ../../source/faqs.md:266 +msgid "" +"**NOTE**: Don't set `additional_config.pa_shape_list` when using this " +"method; otherwise, it will lead to another attention operator. " +"**Important**: Please make sure you're using the **official image** of " +"`vllm-ascend`; otherwise, you **must change** the directory `/vllm-" +"workspace` in `tools/install_flash_infer_attention_score_ops_a2.sh` or " +"`tools/install_flash_infer_attention_score_ops_a3.sh` to your own, or " +"create one. If you're not the root user, you need `sudo` **privileges** " +"to run this script." +msgstr "" +"**注意**:使用此方法时不要设置 `additional_config.pa_shape_list`;否则会导致使用另一个注意力算子。**重要**:请确保你使用的是 `vllm-ascend` 的**官方镜像**;否则,你**必须将** `tools/install_flash_infer_attention_score_ops_a2.sh` 或 `tools/install_flash_infer_attention_score_ops_a3.sh` 中的目录 `/vllm-workspace` **更改为你自己的目录**,或者创建一个。如果你不是 root 用户,则需要 `sudo` **权限**来运行此脚本。" + +#: ../../source/faqs.md:269 +msgid "" +"22. How to set `SOC_VERSION` when building from source on a CPU-only " +"machine?" +msgstr "22. 在仅含 CPU 的机器上从源码构建时,如何设置 `SOC_VERSION`?" + +#: ../../source/faqs.md:271 +msgid "" +"When building from source (e.g. `pip install -e .`), the build may try to" +" infer the target chip via `npu-smi`. If `npu-smi` is not available " +"(common in CPU-only build environments), you must set `SOC_VERSION` " +"manually before installation." +msgstr "" +"从源码构建时(例如 `pip install -e .`),构建过程可能会尝试通过 `npu-smi` 推断目标芯片。如果 `npu-smi` 不可用(在仅含 CPU 的构建环境中很常见),则必须在安装前手动设置 `SOC_VERSION`。" + +#: ../../source/faqs.md:273 +msgid "You can use the defaults from `Dockerfile*` as a reference. For example:" +msgstr "你可以参考 `Dockerfile*` 中的默认值。例如:" + +#: ../../source/faqs.md:289 +msgid "23. Compilation error occasionally encounters with triton-ascend" +msgstr "23. triton-ascend 偶尔遇到编译错误" + +#: ../../source/faqs.md:291 +msgid "" +"As shown in [#7782](https://github.com/vllm-project/vllm-" +"ascend/issues/7782), triton-ascend occasionally encounters compilation " +"errors, which is a known issue in triton-ascend 3.2.0. To avoid this " +"issue, please use the official docker images or install the specific " +"triton-ascend version as following:" +msgstr "" +"如 [#7782](https://github.com/vllm-project/vllm-ascend/issues/7782) 所示,triton-ascend 偶尔会遇到编译错误,这是 triton-ascend 3.2.0 中的一个已知问题。为避免此问题,请使用官方 docker 镜像或按以下方式安装特定的 triton-ascend 版本:" + +#: ../../source/faqs.md:300 +msgid "24. Why TPOT increases drastically as concurrency grows?" +msgstr "24. 为什么 TPOT 随着并发增长而急剧增加?" + +#: ../../source/faqs.md:302 +msgid "" +"When testing a vLLM server, one may find that TPOT increases as " +"concurrency increases (for example, TPOT increases by 0.5 ~ 1ms when " +"concurrency increases by 4). This phenomenon is normal in most cases. " +"However, sometimes TPOT may increase dramatically (10 to 100ms for " +"example) as concurrency grows. This is possibly caused by " +"[**PREEMPTION**](https://docs.vllm.ai/en/latest/configuration/optimization/#preemption)" +" in vLLM. Generally, when your server hits KV cache limits, vLLM tries to" +" free KV cache of requests to ensure sufficient space for other requests," +" which is called preemption in vLLM. When a request is preempted, the " +"default behavior is to recompute the KV cache of this request again in " +"the future, which is why the performance might drop significantly. There " +"are several ways to verify this:" +msgstr "" +"在测试 vLLM 服务器时,可能会发现 TPOT 随着并发度的增加而增加(例如,并发度增加 4 时,TPOT 增加 0.5 ~ 1ms)。在大多数情况下,这种现象是正常的。然而,有时随着并发度的增长,TPOT 可能会急剧增加(例如增加 10 到 100ms)。这可能是由 vLLM 中的 [**抢占**](https://docs.vllm.ai/en/latest/configuration/optimization/#preemption) 引起的。通常,当服务器达到 KV 缓存限制时,vLLM 会尝试释放请求的 KV 缓存,以确保为其他请求提供足够的空间,这在 vLLM 中称为抢占。当一个请求被抢占时,默认行为是在未来重新计算该请求的 KV 缓存,这就是性能可能显著下降的原因。有几种方法可以验证这一点:" + +#: ../../source/faqs.md:305 +msgid "" +"vLLM usually logs stats on your server. You might see metrics like `GPU " +"KV cache usage: 99.0%,`. When reaching 100%, it triggers preemption." +msgstr "" +"vLLM 通常会在服务器上记录统计信息。您可能会看到类似 `GPU KV cache usage: 99.0%,` 的指标。当达到 100% 时,会触发抢占。" + +#: ../../source/faqs.md:306 +msgid "" +"When launching a vLLM server, you will see logs like `GPU KV cache size: " +"66340 tokens` and `Maximum concurrency for 16,384 tokens per request: " +"4.05`. These are estimated KV cache capacity for a single DP group. You " +"can adjust the overall request traffic according to this." +msgstr "" +"启动 vLLM 服务器时,您会看到类似 `GPU KV cache size: 66340 tokens` 和 `Maximum concurrency for 16,384 tokens per request: 4.05` 的日志。这些是针对单个 DP 组的估计 KV 缓存容量。您可以据此调整总体请求流量。" + +#: ../../source/faqs.md:308 +msgid "" +"Preemption cannot be avoided completely since KV cache usage always has a" +" limit. But there are methods to reduce the chances of preemption. As is " +"suggested in " +"[**PREEMPTION**](https://docs.vllm.ai/en/latest/configuration/optimization/#preemption)," +" the core strategy is to increase available KV cache. For example, one " +"can increase `--gpu-memory-utilization` or decrease `--max-num-seqs` && " +"`--max-num-batched-tokens`." +msgstr "" +"抢占无法完全避免,因为 KV 缓存的使用总是有限制的。但有方法可以减少抢占的发生几率。正如 [**抢占**](https://docs.vllm.ai/en/latest/configuration/optimization/#preemption) 中所建议的,核心策略是增加可用的 KV 缓存。例如,可以增加 `--gpu-memory-utilization` 或减少 `--max-num-seqs` 和 `--max-num-batched-tokens`。" + +#~ msgid "" +#~ "[[v0.7.3.post1] FAQ & Feedback](https://github.com" +#~ "/vllm-project/vllm-ascend/issues/1007)" +#~ msgstr "" +#~ "[[v0.7.3.post1] 常见问题与反馈](https://github.com/vllm-project" +#~ "/vllm-ascend/issues/1007)" + +#~ msgid "7. How does vllm-ascend perform?" +#~ msgstr "7. vllm-ascend 的性能如何?" + +#~ msgid "" +#~ "Currently, only some models are " +#~ "improved. Such as `Qwen2.5 VL`, `Qwen3`," +#~ " `Deepseek V3`. Others are not good" +#~ " enough. From 0.9.0rc2, Qwen and " +#~ "Deepseek works with graph mode to " +#~ "play a good performance. What's more," +#~ " you can install `mindie-turbo` with" +#~ " `vllm-ascend v0.7.3` to speed up " +#~ "the inference as well." +#~ msgstr "" +#~ "目前,只有部分模型得到了改进,例如 `Qwen2.5 VL`、`Qwen3` 和 " +#~ "`Deepseek V3`。其他模型的效果还不够理想。从 0.9.0rc2 版本开始,Qwen " +#~ "和 Deepseek 已支持图模式,以获得更好的性能。此外,您还可以在 `vllm-" +#~ "ascend v0.7.3` 上安装 `mindie-turbo` 来进一步加速推理。" + +#~ msgid "" +#~ "Currently, only 1P1D is supported on " +#~ "V0 Engine. For V1 Engine or NPND" +#~ " support, We will make it stable " +#~ "and supported by vllm-ascend in " +#~ "the future." +#~ msgstr "目前,V0 引擎仅支持 1P1D。对于 V1 引擎或 NPND 的支持,我们将在未来使其稳定并由 vllm-ascend 提供支持。" + +#~ msgid "" +#~ "Currently, w8a8 quantization is already " +#~ "supported by vllm-ascend originally on" +#~ " v0.8.4rc2 or higher, If you're using" +#~ " vllm 0.7.3 version, w8a8 quantization " +#~ "is supporeted with the integration of" +#~ " vllm-ascend and mindie-turbo, please" +#~ " use `pip install vllm-ascend[mindie-" +#~ "turbo]`." +#~ msgstr "" +#~ "目前,w8a8 量化已在 v0.8.4rc2 或更高版本的 vllm-ascend 中原生支持。如果您使用的是 vllm 0.7.3 版本,通过集成 vllm-ascend 和 mindie-turbo 也支持 w8a8 量化,请使用 `pip install vllm-ascend[mindie-turbo]`。" + +#~ msgid "11. How to run w8a8 DeepSeek model?" +#~ msgstr "11. 如何运行 w8a8 DeepSeek 模型?" + +#~ msgid "" +#~ "Please following the [inferencing " +#~ "tutorial](https://vllm-" +#~ "ascend.readthedocs.io/en/latest/tutorials/multi_node.html) and" +#~ " replace model to DeepSeek." +#~ msgstr "" +#~ "请按照[推理教程](https://vllm-" +#~ "ascend.readthedocs.io/en/latest/tutorials/multi_node.html)进行操作,并将模型替换为 DeepSeek。" + +#~ msgid "" +#~ "12. There is no output in log " +#~ "when loading models using vllm-ascend," +#~ " How to solve it?" +#~ msgstr "12. 使用 vllm-ascend 加载模型时日志没有输出,如何解决?" + +#~ msgid "" +#~ "If you're using vllm 0.7.3 version, " +#~ "this is a known progress bar " +#~ "display issue in VLLM, which has " +#~ "been resolved in [this PR](https://github.com" +#~ "/vllm-project/vllm/pull/12428), please cherry-" +#~ "pick it locally by yourself. Otherwise," +#~ " please fill up an issue." +#~ msgstr "" +#~ "如果您使用的是 vllm 0.7.3 版本,这是 VLLM 中一个已知的进度条显示问题,已在 [此 PR](https://github.com/vllm-project/vllm/pull/12428) 中解决,请自行在本地进行 cherry-pick。否则,请提交一个 issue。" + +#~ msgid "" +#~ "You may encounter the following error" +#~ " if running DeepSeek with NPU graph" +#~ " mode enabled. The allowed number of" +#~ " queries per kv when enabling both" +#~ " MLA and Graph mode only support " +#~ "{32, 64, 128}, **Thus this is not" +#~ " supported for DeepSeek-V2-Lite**, as it" +#~ " only has 16 attention heads. The " +#~ "NPU graph mode support on " +#~ "DeepSeek-V2-Lite will be done in the " +#~ "future." +#~ msgstr "" +#~ "如果在启用 NPU 图模式的情况下运行 DeepSeek,您可能会遇到以下错误。当同时启用 MLA 和图模式时,每个 kv 允许的查询数仅支持 {32, 64, 128},**因此这不支持 DeepSeek-V2-Lite**,因为它只有 16 个注意力头。未来将增加对 DeepSeek-V2-Lite 的 NPU 图模式支持。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/index.po b/docs/source/locale/zh_CN/LC_MESSAGES/index.po index 26834856..7eb51209 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/index.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/index.po @@ -4,76 +4,71 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: 2025-07-18 10:05+0800\n" "Last-Translator: \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" -"X-Generator: Poedit 3.5\n" +"Generated-By: Babel 2.18.0\n" -#: ../../index.md:33 +#: ../../source/index.md:33 msgid "Getting Started" msgstr "快速开始" -#: ../../index.md:43 +#: ../../source/index.md:45 msgid "User Guide" msgstr "用户指南" -#: ../../index.md:53 +#: ../../source/index.md:56 msgid "Developer Guide" msgstr "开发者指南" -#: ../../index.md:64 +#: ../../source/index.md:66 msgid "Community" msgstr "社区" -#: ../../index.md:1 +#: ../../source/index.md:1 msgid "Welcome to vLLM Ascend Plugin" msgstr "欢迎使用 vLLM Ascend 插件" -#: ../../index.md:3 +#: ../../source/index.md:3 msgid "vLLM" msgstr "vLLM" -#: ../../index.md:24 +#: ../../source/index.md:24 msgid "" -"vLLM Ascend plugin (vllm-ascend) is a community maintained hardware plugin " -"for running vLLM on the Ascend NPU." -msgstr "" -"vLLM Ascend 插件(vllm-ascend)是一个由社区维护的硬件插件,用于在 Ascend " -"NPU 上运行 vLLM。" +"vLLM Ascend plugin (vllm-ascend) is a community-maintained hardware " +"plugin for running vLLM on the Ascend NPU." +msgstr "vLLM Ascend 插件(vllm-ascend)是一个由社区维护的硬件插件,用于在昇腾 NPU 上运行 vLLM。" -#: ../../index.md:26 +#: ../../source/index.md:26 msgid "" -"This plugin is the recommended approach for supporting the Ascend backend " -"within the vLLM community. It adheres to the principles outlined in the " -"[[RFC]: Hardware pluggable](https://github.com/vllm-project/vllm/" -"issues/11162), providing a hardware-pluggable interface that decouples the " -"integration of the Ascend NPU with vLLM." +"This plugin is the recommended approach for supporting the Ascend backend" +" within the vLLM community. It adheres to the principles outlined in the " +"[[RFC]: Hardware pluggable](https://github.com/vllm-" +"project/vllm/issues/11162), providing a hardware-pluggable interface that" +" decouples the integration of the Ascend NPU with vLLM." msgstr "" -"该插件是 vLLM 社区推荐用于支持 Ascend 后端的方法。它遵循 [[RFC]: Hardware " -"pluggable](https://github.com/vllm-project/vllm/issues/11162) 中提出的原" -"则,提供了一个硬件可插拔接口,实现了 Ascend NPU 与 vLLM 集成的解耦。" +"该插件是 vLLM 社区内支持 Ascend 后端的推荐方法。它遵循 [[RFC]: Hardware " +"pluggable](https://github.com/vllm-project/vllm/issues/11162) " +"中概述的原则,提供了一个硬件可插拔接口,将昇腾 NPU 与 vLLM 的集成解耦。" -#: ../../index.md:28 +#: ../../source/index.md:28 msgid "" "By using vLLM Ascend plugin, popular open-source models, including " -"Transformer-like, Mixture-of-Expert, Embedding, Multi-modal LLMs can run " -"seamlessly on the Ascend NPU." +"Transformer-like, Mixture-of-Experts, Embedding, Multi-modal LLMs can run" +" seamlessly on the Ascend NPU." msgstr "" -"通过使用 vLLM Ascend 插件,流行的开源模型,包括 Transformer 类、混合专家、" -"嵌入式、多模态大模型等,都可以在 Ascend NPU 上无缝运行。" +"通过使用 vLLM Ascend 插件,包括类 Transformer、混合专家、嵌入和多模态大语言模型在内的流行开源模型,都可以在昇腾 NPU 上无缝运行。" -#: ../../index.md:30 +#: ../../source/index.md:30 msgid "Documentation" -msgstr "文档" +msgstr "文档" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/installation.po b/docs/source/locale/zh_CN/LC_MESSAGES/installation.po index 5ed464b3..be341da7 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/installation.po @@ -4,290 +4,499 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: 2025-07-18 10:09+0800\n" "Last-Translator: \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" -"X-Generator: Poedit 3.5\n" +"Generated-By: Babel 2.18.0\n" -#: ../../installation.md:1 +#: ../../source/installation.md:1 msgid "Installation" msgstr "安装" -#: ../../installation.md:3 +#: ../../source/installation.md:3 msgid "This document describes how to install vllm-ascend manually." msgstr "本文档介绍如何手动安装 vllm-ascend。" -#: ../../installation.md:5 +#: ../../source/installation.md:5 msgid "Requirements" -msgstr "要求" +msgstr "系统要求" -#: ../../installation.md:7 +#: ../../source/installation.md:7 msgid "OS: Linux" msgstr "操作系统:Linux" -#: ../../installation.md:8 -msgid "Python: >= 3.9, < 3.12" -msgstr "Python:>= 3.9,< 3.12" +#: ../../source/installation.md:8 +msgid "Python: >= 3.10, < 3.12" +msgstr "Python:>= 3.10,< 3.12" -#: ../../installation.md:9 -msgid "A hardware with Ascend NPU. It's usually the Atlas 800 A2 series." -msgstr "配备有昇腾NPU的硬件,通常是Atlas 800 A2系列。" +#: ../../source/installation.md:9 +msgid "Hardware with Ascend NPUs. It's usually the Atlas 800 A2 series." +msgstr "配备昇腾 NPU 的硬件,通常是 Atlas 800 A2 系列。" -#: ../../installation.md:10 +#: ../../source/installation.md:10 msgid "Software:" msgstr "软件:" -#: ../../installation.md +#: ../../source/installation.md msgid "Software" msgstr "软件" -#: ../../installation.md +#: ../../source/installation.md msgid "Supported version" msgstr "支持的版本" -#: ../../installation.md +#: ../../source/installation.md msgid "Note" -msgstr "注释" +msgstr "备注" -#: ../../installation.md +#: ../../source/installation.md +msgid "Ascend HDK" +msgstr "昇腾 HDK" + +#: ../../source/installation.md +msgid "" +"Refer to the documentation [CANN " +"8.3.RC1](https://www.hiascend.com/document/detail/zh/canncommercial/83RC1/releasenote/releasenote_0000.html)" +msgstr "请参考文档 [CANN 8.3.RC1](https://www.hiascend.com/document/detail/zh/canncommercial/83RC1/releasenote/releasenote_0000.html)" + +#: ../../source/installation.md +msgid "Required for CANN" +msgstr "CANN 所需" + +#: ../../source/installation.md msgid "CANN" msgstr "CANN" -#: ../../installation.md -msgid ">= 8.1.RC1" -msgstr ">= 8.1.RC1" +#: ../../source/installation.md +msgid "== 8.5.1" +msgstr "== 8.5.1" -#: ../../installation.md +#: ../../source/installation.md msgid "Required for vllm-ascend and torch-npu" -msgstr "vllm-ascend 和 torch-npu 必需" +msgstr "vllm-ascend 和 torch-npu 所需" -#: ../../installation.md +#: ../../source/installation.md msgid "torch-npu" msgstr "torch-npu" -#: ../../installation.md -msgid ">= 2.5.1.post1.dev20250619" -msgstr ">= 2.5.1.post1.dev20250619" +#: ../../source/installation.md +msgid "== 2.9.0" +msgstr "== 2.9.0" -#: ../../installation.md +#: ../../source/installation.md msgid "" "Required for vllm-ascend, No need to install manually, it will be auto " "installed in below steps" -msgstr "vllm-ascend 必需,无需手动安装,后续步骤会自动安装。" +msgstr "vllm-ascend 所需,无需手动安装,将在后续步骤中自动安装" -#: ../../installation.md +#: ../../source/installation.md msgid "torch" msgstr "torch" -#: ../../installation.md -msgid ">= 2.5.1" -msgstr ">= 2.5.1" - -#: ../../installation.md +#: ../../source/installation.md msgid "Required for torch-npu and vllm" msgstr "torch-npu 和 vllm 所需" -#: ../../installation.md:18 -msgid "You have 2 way to install:" -msgstr "你有两种安装方式:" +#: ../../source/installation.md +msgid "NNAL" +msgstr "NNAL" -#: ../../installation.md:19 -msgid "" -"**Using pip**: first prepare env manually or via CANN image, then install " -"`vllm-ascend` using pip." -msgstr "" -"**使用 pip**:首先手动准备环境或通过 CANN 镜像准备环境,然后使用 pip 安装 " -"`vllm-ascend`。" +#: ../../source/installation.md +msgid "Required for libatb.so, enables advanced tensor operations" +msgstr "libatb.so 所需,用于启用高级张量运算" -#: ../../installation.md:20 +#: ../../source/installation.md:20 +msgid "There are two installation methods:" +msgstr "有两种安装方法:" + +#: ../../source/installation.md:22 msgid "" -"**Using docker**: use the `vllm-ascend` pre-built docker image directly." +"**Using pip**: first prepare the environment manually or via a CANN " +"image, then install `vllm-ascend` using pip." +msgstr "**使用 pip**:首先手动或通过 CANN 镜像准备环境,然后使用 pip 安装 `vllm-ascend`。" + +#: ../../source/installation.md:23 +msgid "**Using docker**: use the `vllm-ascend` pre-built docker image directly." msgstr "**使用 docker**:直接使用 `vllm-ascend` 预构建的 docker 镜像。" -#: ../../installation.md:22 -msgid "Configure a new environment" -msgstr "配置一个新环境" +#: ../../source/installation.md:25 +msgid "Configure Ascend CANN environment" +msgstr "配置昇腾 CANN 环境" -#: ../../installation.md:24 +#: ../../source/installation.md:27 msgid "" -"Before installing, you need to make sure firmware/driver and CANN are " -"installed correctly, refer to [link](https://ascend.github.io/docs/sources/" -"ascend/quick_install.html) for more details." -msgstr "" -"在安装之前,您需要确保固件/驱动和 CANN 已正确安装,更多详情请参考 [链接]" -"(https://ascend.github.io/docs/sources/ascend/quick_install.html)。" +"Before installation, you need to make sure firmware/driver, and CANN are " +"installed correctly, refer to [Ascend Environment Setup " +"Guide](https://ascend.github.io/docs/sources/ascend/quick_install.html) " +"for more details." +msgstr "安装前,您需要确保固件/驱动和 CANN 已正确安装,更多详情请参考 [昇腾环境搭建指南](https://ascend.github.io/docs/sources/ascend/quick_install.html)。" -#: ../../installation.md:26 +#: ../../source/installation.md:29 msgid "Configure hardware environment" msgstr "配置硬件环境" -#: ../../installation.md:28 +#: ../../source/installation.md:31 msgid "" -"To verify that the Ascend NPU firmware and driver were correctly installed, " -"run:" -msgstr "要验证 Ascend NPU 固件和驱动程序是否正确安装,请运行:" +"To verify that the Ascend NPU firmware and driver were correctly " +"installed, run:" +msgstr "要验证昇腾 NPU 固件和驱动程序是否正确安装,请运行:" -#: ../../installation.md:34 +#: ../../source/installation.md:37 msgid "" -"Refer to [Ascend Environment Setup Guide](https://ascend.github.io/docs/" -"sources/ascend/quick_install.html) for more details." -msgstr "" -"更多详情请参考[Ascend环境搭建指南](https://ascend.github.io/docs/sources/" -"ascend/quick_install.html)。" +"Refer to [Ascend Environment Setup " +"Guide](https://ascend.github.io/docs/sources/ascend/quick_install.html) " +"for more details." +msgstr "更多详情请参考 [昇腾环境搭建指南](https://ascend.github.io/docs/sources/ascend/quick_install.html)。" -#: ../../installation.md:36 +#: ../../source/installation.md:39 msgid "Configure software environment" msgstr "配置软件环境" -#: ../../installation.md +#: ../../source/installation.md msgid "Before using pip" -msgstr "在使用 pip 之前" +msgstr "使用 pip 前" -#: ../../installation.md:46 +#: ../../source/installation.md:49 msgid "" "The easiest way to prepare your software environment is using CANN image " "directly:" -msgstr "最简单的方式是直接使用 CANN 镜像来准备您的软件环境:" +msgstr "准备软件环境最简单的方法是直接使用 CANN 镜像:" -#: ../../installation.md +#: ../../source/installation.md:52 +msgid "" +"The CANN prebuilt image includes NNAL (Ascend Neural Network Acceleration" +" Library), which provides libatb.so for advanced tensor operations. No " +"additional installation is required when using the prebuilt image." +msgstr "CANN 预构建镜像包含 NNAL(昇腾神经网络加速库),它提供了用于高级张量运算的 libatb.so。使用预构建镜像时无需额外安装。" + +#: ../../source/installation.md msgid "Click here to see \"Install CANN manually\"" msgstr "点击此处查看“手动安装 CANN”" -#: ../../installation.md:72 +#: ../../source/installation.md:80 msgid "You can also install CANN manually:" -msgstr "你也可以手动安装 CANN:" +msgstr "您也可以手动安装 CANN:" -#: ../../installation.md +#: ../../source/installation.md:83 +msgid "" +"If you encounter \"libatb.so not found\" errors during runtime, please " +"ensure NNAL is properly installed as shown in the manual installation " +"steps below." +msgstr "如果在运行时遇到“libatb.so not found”错误,请确保 NNAL 已正确安装,如下方手动安装步骤所示。" + +#: ../../source/installation.md msgid "Before using docker" -msgstr "在使用 docker 之前" +msgstr "使用 docker 前" -#: ../../installation.md:104 +#: ../../source/installation.md:115 msgid "" -"No more extra step if you are using `vllm-ascend` prebuilt docker image." -msgstr "如果你使用 `vllm-ascend` 预构建的 docker 镜像,就无需额外的步骤。" +"No extra steps are needed if you are using the `vllm-ascend` prebuilt " +"Docker image." +msgstr "如果您使用 `vllm-ascend` 预构建的 Docker 镜像,则无需额外步骤。" -#: ../../installation.md:108 -msgid "Once it's done, you can start to set up `vllm` and `vllm-ascend`." -msgstr "完成后,你可以开始配置 `vllm` 和 `vllm-ascend`。" - -#: ../../installation.md:110 -msgid "Setup vllm and vllm-ascend" -msgstr "安装 vllm 和 vllm-ascend" - -#: ../../installation.md -msgid "Using pip" -msgstr "使用 pip" - -#: ../../installation.md:121 -msgid "First install system dependencies and config pip mirror:" -msgstr "首先安装系统依赖并配置 pip 镜像:" - -#: ../../installation.md:133 +#: ../../source/installation.md:119 msgid "" -"**[Optional]** Then config the extra-index of `pip` if you are working on a " -"x86 machine or using torch-npu dev version:" -msgstr "" -"**[可选]** 如果你在 x86 机器上工作或使用 torch-npu 开发版,请配置 `pip` 的额" -"外索引:" +"Once this is done, you can start to set up `vllm` and `vllm-ascend`." +msgstr "完成此步骤后,您就可以开始设置 `vllm` 和 `vllm-ascend`。" -#: ../../installation.md:140 +#: ../../source/installation.md:121 +msgid "Set up using Python" +msgstr "使用 Python 设置" + +#: ../../source/installation.md:123 +msgid "First, install system dependencies and configure the pip mirror:" +msgstr "首先,安装系统依赖项并配置 pip 镜像:" + +#: ../../source/installation.md:135 msgid "" -"Then you can install `vllm` and `vllm-ascend` from **pre-built wheel**:" -msgstr "然后你可以从**预编译的 wheel 包**安装 `vllm` 和 `vllm-ascend`:" +"**[Optional]** Then configure the extra-index of `pip` if you are working" +" on an x86 machine or using torch-npu dev version:" +msgstr "**[可选]** 如果您在 x86 机器上工作或使用 torch-npu 开发版本,请配置 `pip` 的额外索引:" -#: ../../installation.md +#: ../../source/installation.md:142 +msgid "Then you can install `vllm` and `vllm-ascend` from a **pre-built wheel**:" +msgstr "然后,您可以从 **预构建的 wheel 包** 安装 `vllm` 和 `vllm-ascend`:" + +#: ../../source/installation.md msgid "Click here to see \"Build from source code\"" msgstr "点击此处查看“从源代码构建”" -#: ../../installation.md:153 +#: ../../source/installation.md:155 msgid "or build from **source code**:" -msgstr "或者从**源代码**构建:" +msgstr "或从 **源代码** 构建:" -#: ../../installation.md:171 +#: ../../source/installation.md:174 msgid "" -"vllm-ascend will build custom ops by default. If you don't want to build " -"it, set `COMPILE_CUSTOM_KERNELS=0` environment to disable it." -msgstr "" -"vllm-ascend 默认会编译自定义算子。如果你不想编译它,可以设置环境变量 " -"`COMPILE_CUSTOM_KERNELS=0` 来禁用。" +"If you are building custom operators for Atlas A3, you should run `git " +"submodule update --init --recursive` manually, or ensure your environment" +" has internet access." +msgstr "如果您正在为 Atlas A3 构建自定义算子,您应该手动运行 `git submodule update --init --recursive`,或确保您的环境可以访问互联网。" -#: ../../installation.md:175 +#: ../../source/installation.md:178 msgid "" -"If you are building from v0.7.3-dev and intend to use sleep mode feature, " -"you should set `COMPILE_CUSTOM_KERNELS=1` manually. To build custom ops, " -"gcc/g++ higher than 8 and c++ 17 or higher is required. If you're using " -"`pip install -e .` and encourage a torch-npu version conflict, please " -"install with `pip install --no-build-isolation -e .` to build on system " -"env. If you encounter other problems during compiling, it is probably " -"because unexpected compiler is being used, you may export `CXX_COMPILER` " -"and `C_COMPILER` in env to specify your g++ and gcc locations before " -"compiling." -msgstr "" -"如果你是从 v0.7.3-dev 版本开始构建,并且打算使用休眠模式功能,你需要手动设" -"置 `COMPILE_CUSTOM_KERNELS=1`。构建自定义算子时,要求 gcc/g++ 版本高于 8 且" -"支持 c++ 17 或更高标准。如果你正在使用 `pip install -e .` 并且出现了 torch-" -"npu 版本冲突,请使用 `pip install --no-build-isolation -e .` 在系统环境下进" -"行安装。如果在编译过程中遇到其它问题,可能是因为使用了非预期的编译器,你可以" -"在编译前通过环境变量导出 `CXX_COMPILER` 和 `C_COMPILER`,以指定你的 g++ 和 " -"gcc 路径。" +"To build custom operators, gcc/g++ higher than 8 and C++17 or higher are " +"required. If you are using `pip install -e .` and encounter a torch-npu " +"version conflict, please install with `pip install --no-build-isolation " +"-e .` to build on system env. If you encounter other problems during " +"compiling, it is probably because an unexpected compiler is being used, " +"you may export `CXX_COMPILER` and `C_COMPILER` in the environment to " +"specify your g++ and gcc locations before compiling." +msgstr "构建自定义算子需要 gcc/g++ 版本高于 8 且支持 C++17 或更高标准。如果您使用 `pip install -e .` 并遇到 torch-npu 版本冲突,请使用 `pip install --no-build-isolation -e .` 在系统环境中进行安装。如果在编译过程中遇到其他问题,可能是因为使用了非预期的编译器,您可以在编译前通过环境变量导出 `CXX_COMPILER` 和 `C_COMPILER` 来指定您的 g++ 和 gcc 路径。" -#: ../../installation.md -msgid "Using docker" -msgstr "使用 docker" +#: ../../source/installation.md:181 +msgid "" +"If you are building in a CPU-only environment where `npu-smi` is " +"unavailable, you need to set `SOC_VERSION` before `pip install -e .` so " +"the build can target the correct chip. You can refer to `Dockerfile*` " +"defaults, for example:" +msgstr "如果您在仅 CPU 的环境中构建,且 `npu-smi` 不可用,则需要在 `pip install -e .` 之前设置 `SOC_VERSION`,以便构建过程能针对正确的芯片。您可以参考 `Dockerfile*` 的默认值,例如:" -#: ../../installation.md:184 -msgid "You can just pull the **prebuilt image** and run it with bash." -msgstr "你可以直接拉取**预构建镜像**并用 bash 运行它。" +#: ../../source/installation.md:183 +msgid "Atlas A2: `export SOC_VERSION=ascend910b1`" +msgstr "Atlas A2:`export SOC_VERSION=ascend910b1`" -#: ../../installation.md +#: ../../source/installation.md:184 +msgid "Atlas A3: `export SOC_VERSION=ascend910_9391`" +msgstr "Atlas A3:`export SOC_VERSION=ascend910_9391`" + +#: ../../source/installation.md:185 +msgid "Atlas 300I: `export SOC_VERSION=ascend310p1`" +msgstr "Atlas 300I:`export SOC_VERSION=ascend310p1`" + +#: ../../source/installation.md:186 +msgid "Atlas A5: `export SOC_VERSION=`" +msgstr "Atlas A5:`export SOC_VERSION=<以 \"ascend950\" 开头的值>`" + +#: ../../source/installation.md:189 +msgid "Set up using Docker" +msgstr "使用 Docker 设置" + +#: ../../source/installation.md:191 +msgid "" +"`vllm-ascend` offers Docker images for deployment. You can just pull the " +"**prebuilt image** from the image repository [ascend/vllm-" +"ascend](https://quay.io/repository/ascend/vllm-ascend?tab=tags) and run " +"it with bash." +msgstr "`vllm-ascend` 提供用于部署的 Docker 镜像。您可以直接从镜像仓库 [ascend/vllm-ascend](https://quay.io/repository/ascend/vllm-ascend?tab=tags) 拉取 **预构建镜像** 并使用 bash 运行。" + +#: ../../source/installation.md:193 +msgid "Supported images as following." +msgstr "支持的镜像如下。" + +#: ../../source/installation.md:177 +msgid "image name" +msgstr "镜像名称" + +#: ../../source/installation.md:177 +msgid "Hardware" +msgstr "硬件" + +#: ../../source/installation.md:177 +msgid "OS" +msgstr "操作系统" + +#: ../../source/installation.md:177 +msgid "vllm-ascend:{{ vllm_ascend_version }}" +msgstr "vllm-ascend:{{ vllm_ascend_version }}" + +#: ../../source/installation.md:177 +msgid "Atlas A2" +msgstr "Atlas A2" + +#: ../../source/installation.md:177 +msgid "Ubuntu" +msgstr "Ubuntu" + +#: ../../source/installation.md:177 +msgid "vllm-ascend:{{ vllm_ascend_version }}-openeuler" +msgstr "vllm-ascend:{{ vllm_ascend_version }}-openeuler" + +#: ../../source/installation.md:177 +msgid "openEuler" +msgstr "openEuler" + +#: ../../source/installation.md:177 +msgid "vllm-ascend:{{ vllm_ascend_version }}-a3" +msgstr "vllm-ascend:{{ vllm_ascend_version }}-a3" + +#: ../../source/installation.md:177 +msgid "Atlas A3" +msgstr "Atlas A3" + +#: ../../source/installation.md:177 +msgid "vllm-ascend:{{ vllm_ascend_version }}-a3-openeuler" +msgstr "vllm-ascend:{{ vllm_ascend_version }}-a3-openeuler" + +#: ../../source/installation.md:177 +msgid "vllm-ascend:{{ vllm_ascend_version }}-310p" +msgstr "vllm-ascend:{{ vllm_ascend_version }}-310p" + +#: ../../source/installation.md:177 +msgid "Atlas 300I" +msgstr "Atlas 300I" + +#: ../../source/installation.md:177 +msgid "vllm-ascend:{{ vllm_ascend_version }}-310p-openeuler" +msgstr "vllm-ascend:{{ vllm_ascend_version }}-310p-openeuler" + +#: ../../source/installation.md msgid "Click here to see \"Build from Dockerfile\"" msgstr "点击这里查看“从 Dockerfile 构建”" -#: ../../installation.md:187 +#: ../../source/installation.md:205 msgid "or build IMAGE from **source code**:" msgstr "或从**源代码**构建 IMAGE:" -#: ../../installation.md:218 +#: ../../source/installation.md:247 msgid "" -"The default workdir is `/workspace`, vLLM and vLLM Ascend code are placed " -"in `/vllm-workspace` and installed in [development mode](https://setuptools." -"pypa.io/en/latest/userguide/development_mode.html)(`pip install -e`) to " -"help developer immediately take place changes without requiring a new " -"installation." +"The default workdir is `/workspace`, vLLM and vLLM Ascend code are placed" +" in `/vllm-workspace` and installed in [development " +"mode](https://setuptools.pypa.io/en/latest/userguide/development_mode.html)" +" (`pip install -e`) to help developer immediately take place changes " +"without requiring a new installation." msgstr "" -"默认的工作目录是 `/workspace`,vLLM 和 vLLM Ascend 代码被放置在 `/vllm-" -"workspace`,并以[开发模式](https://setuptools.pypa.io/en/latest/userguide/" -"development_mode.html)(`pip install -e`)安装,以便开发者能够即时生效更改," -"而无需重新安装。" +"默认工作目录为 `/workspace`,vLLM 和 vLLM Ascend 代码位于 `/vllm-workspace`" +" 目录下,并以[开发模式](https://setuptools.pypa.io/en/latest/userguide/development_mode.html)(`pip" +" install -e`)安装,以便开发者能够即时应用更改,而无需重新安装。" -#: ../../installation.md:222 +#: ../../source/installation.md:249 msgid "Extra information" msgstr "额外信息" -#: ../../installation.md:224 +#: ../../source/installation.md:251 msgid "Verify installation" msgstr "验证安装" -#: ../../installation.md:226 +#: ../../source/installation.md:253 msgid "Create and run a simple inference test. The `example.py` can be like:" -msgstr "创建并运行一个简单的推理测试。`example.py` 可以如下:" +msgstr "创建并运行一个简单的推理测试。`example.py` 内容示例如下:" -#: ../../installation.md:251 +#: ../../source/installation.md:278 msgid "Then run:" msgstr "然后运行:" -#: ../../installation.md:259 +#: ../../source/installation.md:284 +msgid "" +"If you encounter a connection error with Hugging Face (e.g., `We couldn't" +" connect to 'https://huggingface.co' to load the files, and couldn't find" +" them in the cached files.`), run the following commands to use " +"ModelScope as an alternative:" +msgstr "" +"如果遇到 Hugging Face 连接错误(例如:`We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.`),请运行以下命令以使用 ModelScope 作为替代方案:" + +#: ../../source/installation.md:292 msgid "The output will be like:" -msgstr "输出将会像这样:" +msgstr "输出示例如下:" + +#: ../../source/installation.md:316 +msgid "Multi-node Deployment" +msgstr "多节点部署" + +#: ../../source/installation.md:318 +msgid "Verify Multi-Node Communication" +msgstr "验证多节点通信" + +#: ../../source/installation.md:320 +msgid "" +"First, check physical layer connectivity, then verify each node, and " +"finally verify the inter-node connectivity." +msgstr "首先,检查物理层连通性,然后验证每个节点,最后验证节点间连通性。" + +#: ../../source/installation.md:322 +msgid "Physical Layer Requirements" +msgstr "物理层要求" + +#: ../../source/installation.md:324 +msgid "" +"The physical machines must be located on the same WLAN, with network " +"connectivity." +msgstr "物理机必须位于同一无线局域网(WLAN)内,并具备网络连通性。" + +#: ../../source/installation.md:325 +msgid "" +"All NPUs are connected with optical modules, and the connection status " +"must be normal." +msgstr "所有 NPU 均通过光模块连接,且连接状态必须正常。" + +#: ../../source/installation.md:327 +msgid "Each Node Verification" +msgstr "单节点验证" + +#: ../../source/installation.md:329 +msgid "" +"Execute the following commands on each node in sequence. The results must" +" all be `success` and the status must be `UP`:" +msgstr "在每个节点上依次执行以下命令。所有结果必须为 `success`,状态必须为 `UP`:" + +#: ../../source/installation.md +msgid "A2 series" +msgstr "A2 系列" + +#: ../../source/installation.md +msgid "A3 series" +msgstr "A3 系列" + +#: ../../source/installation.md:374 +msgid "Interconnect Verification" +msgstr "互连验证" + +#: ../../source/installation.md:376 +msgid "1. Get NPU IP Addresses" +msgstr "1. 获取 NPU IP 地址" + +#: ../../source/installation.md:399 +msgid "2. Cross-Node PING Test" +msgstr "2. 跨节点 PING 测试" + +#: ../../source/installation.md:406 +msgid "Run Container In Each Node" +msgstr "在每个节点中运行容器" + +#: ../../source/installation.md:408 +msgid "" +"Using vLLM-ascend official container is more efficient to run multi-node " +"environment." +msgstr "使用 vLLM-ascend 官方容器运行多节点环境更为高效。" + +#: ../../source/installation.md:410 +msgid "" +"Run the following command to start the container in each node (You should" +" download the weight to /root/.cache in advance):" +msgstr "在每个节点中运行以下命令以启动容器(您应提前将权重下载到 /root/.cache 目录):" + +#~ msgid ">= 8.1.RC1" +#~ msgstr ">= 8.1.RC1" + +#~ msgid ">= 2.5.1.post1.dev20250619" +#~ msgstr ">= 2.5.1.post1.dev20250619" + +#~ msgid "You have 2 way to install:" +#~ msgstr "您有两种安装方式:" + +#~ msgid "Setup vllm and vllm-ascend" +#~ msgstr "安装 vllm 和 vllm-ascend" + +#~ msgid "Using pip" +#~ msgstr "使用 pip" + +#~ msgid "" +#~ "vllm-ascend will build custom ops " +#~ "by default. If you don't want to" +#~ " build it, set `COMPILE_CUSTOM_KERNELS=0` " +#~ "environment to disable it." +#~ msgstr "" +#~ "vllm-ascend 默认会编译自定义算子。如果您不想编译它,可以设置环境变量 " +#~ "`COMPILE_CUSTOM_KERNELS=0` 来禁用。" + +#~ msgid "You can just pull the **prebuilt image** and run it with bash." +#~ msgstr "您可以直接拉取**预构建镜像**并用 bash 运行它。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po b/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po index ebe66fdd..9dbb3403 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po @@ -4,146 +4,150 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: 2025-07-18 10:09+0800\n" "Last-Translator: \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" -"X-Generator: Poedit 3.5\n" +"Generated-By: Babel 2.18.0\n" -#: ../../quick_start.md:1 +#: ../../source/quick_start.md:1 msgid "Quickstart" msgstr "快速入门" -#: ../../quick_start.md:3 +#: ../../source/quick_start.md:3 msgid "Prerequisites" msgstr "先决条件" -#: ../../quick_start.md:5 +#: ../../source/quick_start.md:5 msgid "Supported Devices" msgstr "支持的设备" -#: ../../quick_start.md:6 +#: ../../source/quick_start.md:7 msgid "" -"Atlas A2 Training series (Atlas 800T A2, Atlas 900 A2 PoD, Atlas 200T A2 " +"Atlas A2 training series (Atlas 800T A2, Atlas 900 A2 PoD, Atlas 200T A2 " "Box16, Atlas 300T A2)" msgstr "" -"Atlas A2 训练系列(Atlas 800T A2,Atlas 900 A2 PoD,Atlas 200T A2 Box16," -"Atlas 300T A2)" +"Atlas A2 训练系列(Atlas 800T A2、Atlas 900 A2 PoD、Atlas 200T A2 Box16、Atlas " +"300T A2)" -#: ../../quick_start.md:7 -msgid "Atlas 800I A2 Inference series (Atlas 800I A2)" +#: ../../source/quick_start.md:8 +msgid "Atlas 800I A2 inference series (Atlas 800I A2)" msgstr "Atlas 800I A2 推理系列(Atlas 800I A2)" -#: ../../quick_start.md:9 +#: ../../source/quick_start.md:9 +msgid "" +"Atlas A3 training series (Atlas 800T A3, Atlas 900 A3 SuperPoD, Atlas " +"9000 A3 SuperPoD)" +msgstr "" +"Atlas A3 训练系列(Atlas 800T A3、Atlas 900 A3 SuperPoD、Atlas 9000 A3 SuperPoD)" + +#: ../../source/quick_start.md:10 +msgid "Atlas 800I A3 inference series (Atlas 800I A3)" +msgstr "Atlas 800I A3 推理系列(Atlas 800I A3)" + +#: ../../source/quick_start.md:11 +msgid "[Experimental] Atlas 300I inference series (Atlas 300I Duo)" +msgstr "[实验性] Atlas 300I 推理系列(Atlas 300I Duo)" + +#: ../../source/quick_start.md:13 msgid "Setup environment using container" msgstr "使用容器设置环境" -#: ../../quick_start.md +#: ../../source/quick_start.md msgid "Ubuntu" msgstr "Ubuntu" -#: ../../quick_start.md +#: ../../source/quick_start.md msgid "openEuler" msgstr "openEuler" -#: ../../quick_start.md:69 +#: ../../source/quick_start.md:85 msgid "" -"The default workdir is `/workspace`, vLLM and vLLM Ascend code are placed " -"in `/vllm-workspace` and installed in [development mode](https://setuptools." -"pypa.io/en/latest/userguide/development_mode.html)(`pip install -e`) to " -"help developer immediately take place changes without requiring a new " -"installation." +"The default workdir is `/workspace`, vLLM and vLLM Ascend code are placed" +" in `/vllm-workspace` and installed in [development " +"mode](https://setuptools.pypa.io/en/latest/userguide/development_mode.html)" +" (`pip install -e`) to help developers make changes effective immediately" +" without requiring a new installation." msgstr "" -"默认的工作目录是 `/workspace`,vLLM 和 vLLM Ascend 代码被放置在 `/vllm-" -"workspace`,并以[开发模式](https://setuptools.pypa.io/en/latest/userguide/" -"development_mode.html)(`pip install -e`)安装,以便开发者能够即时生效更改," -"而无需重新安装。" +"默认工作目录为 `/workspace`,vLLM 和 vLLM Ascend 代码位于 `/vllm-workspace` 目录下,并以[开发模式](https://setuptools.pypa.io/en/latest/userguide/development_mode.html)(`pip install -e`)安装,以便开发者能够即时生效更改,而无需重新安装。" -#: ../../quick_start.md:71 +#: ../../source/quick_start.md:87 msgid "Usage" msgstr "用法" -#: ../../quick_start.md:73 -msgid "You can use Modelscope mirror to speed up download:" -msgstr "你可以使用 Modelscope 镜像来加速下载:" +#: ../../source/quick_start.md:89 +msgid "You can use ModelScope mirror to speed up download:" +msgstr "您可以使用 ModelScope 镜像来加速下载:" -#: ../../quick_start.md:80 +#: ../../source/quick_start.md:97 msgid "There are two ways to start vLLM on Ascend NPU:" msgstr "在昇腾 NPU 上启动 vLLM 有两种方式:" -#: ../../quick_start.md +#: ../../source/quick_start.md msgid "Offline Batched Inference" msgstr "离线批量推理" -#: ../../quick_start.md:86 +#: ../../source/quick_start.md:103 msgid "" "With vLLM installed, you can start generating texts for list of input " -"prompts (i.e. offline batch inferencing)." -msgstr "" -"安装了 vLLM 后,您可以开始为一系列输入提示生成文本(即离线批量推理)。" +"prompts (i.e. offline batch inference)." +msgstr "安装 vLLM 后,您可以开始为一系列输入提示生成文本(即离线批量推理)。" -#: ../../quick_start.md:88 +#: ../../source/quick_start.md:105 msgid "" -"Try to run below Python script directly or use `python3` shell to generate " -"texts:" -msgstr "" -"尝试直接运行下面的 Python 脚本,或者使用 `python3` 交互式命令行来生成文本:" +"Try to run below Python script directly or use `python3` shell to " +"generate texts:" +msgstr "尝试直接运行下面的 Python 脚本,或者使用 `python3` 交互式环境来生成文本:" -#: ../../quick_start.md +#: ../../source/quick_start.md msgid "OpenAI Completions API" msgstr "OpenAI Completions API" -#: ../../quick_start.md:114 +#: ../../source/quick_start.md:132 msgid "" "vLLM can also be deployed as a server that implements the OpenAI API " -"protocol. Run the following command to start the vLLM server with the [Qwen/" -"Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) " -"model:" +"protocol. Run the following command to start the vLLM server with the " +"[Qwen/Qwen3-0.6B](https://huggingface.co/Qwen/Qwen3-0.6B) model:" msgstr "" -"vLLM 也可以作为实现 OpenAI API 协议的服务器进行部署。运行以下命令,使用 " -"[Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-" -"Instruct) 模型启动 vLLM 服务器:" +"vLLM 也可以部署为实现 OpenAI API 协议的服务器。运行以下命令,使用 [Qwen/Qwen3-0.6B](https://huggingface.co/Qwen/Qwen3-0.6B) 模型启动 vLLM 服务器:" -#: ../../quick_start.md:124 -msgid "If you see log as below:" -msgstr "如果你看到如下日志:" +#: ../../source/quick_start.md:143 +msgid "If you see a log as below:" +msgstr "如果您看到如下日志:" -#: ../../quick_start.md:132 +#: ../../source/quick_start.md:152 msgid "Congratulations, you have successfully started the vLLM server!" -msgstr "恭喜,你已经成功启动了 vLLM 服务器!" +msgstr "恭喜,您已成功启动 vLLM 服务器!" -#: ../../quick_start.md:134 -msgid "You can query the list the models:" -msgstr "你可以查询模型列表:" +#: ../../source/quick_start.md:154 +msgid "You can query the list of models:" +msgstr "您可以查询模型列表:" -#: ../../quick_start.md:141 +#: ../../source/quick_start.md:162 msgid "You can also query the model with input prompts:" -msgstr "你也可以通过输入提示来查询模型:" +msgstr "您也可以通过输入提示来查询模型:" -#: ../../quick_start.md:155 +#: ../../source/quick_start.md:177 msgid "" -"vLLM is serving as background process, you can use `kill -2 $VLLM_PID` to " -"stop the background process gracefully, it's equal to `Ctrl-C` to stop " -"foreground vLLM process:" +"vLLM is serving as a background process, you can use `kill -2 $VLLM_PID` " +"to stop the background process gracefully, which is similar to `Ctrl-C` " +"for stopping the foreground vLLM process:" msgstr "" -"vLLM 正作为后台进程运行,你可以使用 `kill -2 $VLLM_PID` 来优雅地停止后台进" -"程,这等同于使用 `Ctrl-C` 停止前台 vLLM 进程:" +"vLLM 正作为后台进程运行,您可以使用 `kill -2 $VLLM_PID` 来优雅地停止后台进程,这类似于使用 `Ctrl-C` 停止前台 vLLM 进程:" -#: ../../quick_start.md:164 -msgid "You will see output as below:" -msgstr "你将会看到如下输出:" +#: ../../source/quick_start.md:186 +msgid "The output is as below:" +msgstr "输出如下:" -#: ../../quick_start.md:172 -msgid "Finally, you can exit container by using `ctrl-D`." -msgstr "最后,你可以通过按 `ctrl-D` 退出容器。" +#: ../../source/quick_start.md:195 +msgid "Finally, you can exit the container by using `ctrl-D`." +msgstr "最后,您可以通过按 `ctrl-D` 退出容器。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/index.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/index.po new file mode 100644 index 00000000..ad363e48 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/index.po @@ -0,0 +1,29 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/features/index.md:1 +#: ../../source/tutorials/features/index.md:5 +msgid "Feature Tutorials" +msgstr "功能教程" + +#: ../../source/tutorials/features/index.md:3 +msgid "This section provides tutorials for different features of vLLM Ascend." +msgstr "本节提供 vLLM Ascend 不同功能的使用教程。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_multi_node.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_multi_node.po new file mode 100644 index 00000000..071513e3 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_multi_node.po @@ -0,0 +1,447 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:1 +msgid "Long-Sequence Context Parallel (Deepseek)" +msgstr "长序列上下文并行 (Deepseek)" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:3 +msgid "Getting Started" +msgstr "快速开始" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:6 +msgid "" +"Context parallel feature currently is only supported on Atlas A3 device, " +"and will be supported on Atlas A2 in the future." +msgstr "上下文并行特性目前仅在 Atlas A3 设备上受支持,未来将在 Atlas A2 上提供支持。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:9 +msgid "" +"vLLM-Ascend now supports long sequence with context parallel options. " +"This guide takes one-by-one steps to verify these features with " +"constrained resources." +msgstr "vLLM-Ascend 现已支持长序列上下文并行选项。本指南将逐步引导您在有限资源下验证这些功能。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:11 +msgid "" +"Take the Deepseek-V3.1-w8a8 model as an example, use 3 Atlas 800T A3 " +"servers to deploy the “1P1D” architecture. Node p is deployed across " +"multiple machines, while node d is deployed on a single machine. Assume " +"the IP of the prefiller server is 192.0.0.1 (prefill 1) and 192.0.0.2 " +"(prefill 2), and the decoder servers are 192.0.0.3 (decoder 1). On each " +"server, use 8 NPUs 16 chips to deploy one service instance. In the " +"current example, we will enable the context parallel feature on node p to" +" improve TTFT. Although enabling the DCP feature on node d can reduce " +"memory usage, it would introduce additional communication and small " +"operator overhead. Therefore, we will not enable the DCP feature on node " +"d." +msgstr "以 Deepseek-V3.1-w8a8 模型为例,使用 3 台 Atlas 800T A3 服务器部署“1P1D”架构。节点 p 跨多台机器部署,而节点 d 部署在单台机器上。假设预填充服务器的 IP 为 192.0.0.1(预填充 1)和 192.0.0.2(预填充 2),解码器服务器为 192.0.0.3(解码器 1)。每台服务器使用 8 个 NPU(16 个芯片)部署一个服务实例。在当前示例中,我们将在节点 p 上启用上下文并行特性以改善 TTFT。虽然在节点 d 上启用 DCP 特性可以减少内存使用,但会引入额外的通信和小算子开销。因此,我们不会在节点 d 上启用 DCP 特性。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:13 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:15 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:17 +msgid "" +"`DeepSeek-V3.1_w8a8mix_mtp` (Quantized version with mix mtp): [Download " +"model weight](https://www.modelscope.cn/models/Eco-" +"Tech/DeepSeek-V3.1-w8a8). Please modify `torch_dtype` from `float16` to " +"`bfloat16` in `config.json`." +msgstr "`DeepSeek-V3.1_w8a8mix_mtp`(混合 MTP 量化版本):[下载模型权重](https://www.modelscope.cn/models/Eco-Tech/DeepSeek-V3.1-w8a8)。请在 `config.json` 中将 `torch_dtype` 从 `float16` 修改为 `bfloat16`。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:19 +msgid "" +"It is recommended to download the model weight to the shared directory of" +" multiple nodes, such as `/root/.cache/`" +msgstr "建议将模型权重下载到多个节点的共享目录中,例如 `/root/.cache/`" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:21 +msgid "Verify Multi-node Communication" +msgstr "验证多节点通信" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:23 +msgid "" +"Refer to [verify multi-node communication " +"environment](../../installation.md#verify-multi-node-communication) to " +"verify multi-node communication." +msgstr "请参考[验证多节点通信环境](../../installation.md#verify-multi-node-communication)来验证多节点通信。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:25 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:27 +msgid "You can use our official Docker image to run `DeepSeek-V3.1` directly." +msgstr "您可以使用我们的官方 Docker 镜像直接运行 `DeepSeek-V3.1`。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:29 +msgid "" +"Select an image based on your machine type and start the Docker image on " +"your node, refer to [using Docker](../../installation.md#set-up-using-" +"docker)." +msgstr "根据您的机器类型选择镜像并在节点上启动 Docker 镜像,请参考[使用 Docker](../../installation.md#set-up-using-docker)。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:64 +msgid "You need to set up environment on each node." +msgstr "您需要在每个节点上设置环境。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:66 +msgid "Prefiller/Decoder Deployment" +msgstr "预填充器/解码器部署" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:68 +msgid "" +"We can run the following scripts to launch a server on the " +"prefiller/decoder node, respectively. Please note that each P/D node will" +" occupy ports ranging from kv_port to kv_port + num_chips to initialize " +"socket listeners. To avoid any issues, port conflicts should be " +"prevented. Additionally, ensure that each node's engine_id is uniquely " +"assigned to avoid conflicts." +msgstr "我们可以分别在预填充器/解码器节点上运行以下脚本来启动服务器。请注意,每个 P/D 节点将占用从 kv_port 到 kv_port + num_chips 的端口范围来初始化 socket 监听器。为避免任何问题,应防止端口冲突。此外,请确保每个节点的 engine_id 被唯一分配以避免冲突。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:70 +msgid "" +"Run the following script to execute online 128k inference on three nodes " +"respectively." +msgstr "运行以下脚本,分别在三个节点上执行在线 128k 推理。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md +msgid "Prefiller node 1" +msgstr "预填充节点 1" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md +msgid "Prefiller node 2" +msgstr "预填充节点 2" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md +msgid "Decoder node 1" +msgstr "解码节点 1" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:276 +msgid "Prefill master node `proxy.sh` script" +msgstr "预填充主节点 `proxy.sh` 脚本" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:292 +msgid "Run proxy" +msgstr "运行代理" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:294 +msgid "" +"Run a proxy server on the same node with the prefiller service instance. " +"You can get the proxy program in the repository's examples: " +"[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-" +"project/vllm-" +"ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" +msgstr "在与预填充服务实例相同的节点上运行代理服务器。您可以在仓库的示例中找到代理程序:[load_balance_proxy_server_example.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:301 +msgid "**Notice:** The parameters are explained as follows:" +msgstr "**注意:** 参数解释如下:" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:304 +msgid "" +"`--tensor-parallel-size` 16 are common settings for tensor parallelism " +"(TP) sizes." +msgstr "`--tensor-parallel-size` 16 是张量并行(TP)大小的常见设置。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:305 +msgid "" +"`--prefill-context-parallel-size` 2 are common settings for prefill " +"context parallelism (PCP) sizes." +msgstr "`--prefill-context-parallel-size` 2 是预填充上下文并行(PCP)大小的常见设置。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:306 +msgid "" +"`--decode-context-parallel-size` 8 are common settings for decode context" +" parallelism (DCP) sizes." +msgstr "`--decode-context-parallel-size` 8 是解码上下文并行(DCP)大小的常见设置。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:307 +msgid "" +"`--max-model-len` represents the context length, which is the maximum " +"value of the input plus output for a single request." +msgstr "`--max-model-len` 表示上下文长度,即单个请求的输入加输出的最大值。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:308 +msgid "" +"`--max-num-seqs` indicates the maximum number of requests that each DP " +"group is allowed to process. If the number of requests sent to the " +"service exceeds this limit, the excess requests will remain in a waiting " +"state and will not be scheduled. Note that the time spent in the waiting " +"state is also counted in metrics such as TTFT and TPOT. Therefore, when " +"testing performance, it is generally recommended that `--max-num-seqs` * " +"`--data-parallel-size` >= the actual total concurrency." +msgstr "`--max-num-seqs` 表示每个 DP 组允许处理的最大请求数。如果发送到服务的请求数量超过此限制,超出的请求将保持在等待状态,不会被调度。请注意,在等待状态所花费的时间也会计入 TTFT 和 TPOT 等指标。因此,在测试性能时,通常建议 `--max-num-seqs` * `--data-parallel-size` >= 实际总并发数。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:309 +msgid "" +"`--max-num-batched-tokens` represents the maximum number of tokens that " +"the model can process in a single step. Currently, vLLM v1 scheduling " +"enables ChunkPrefill/SplitFuse by default, which means:" +msgstr "`--max-num-batched-tokens` 表示模型单步可以处理的最大 token 数。目前,vLLM v1 调度默认启用 ChunkPrefill/SplitFuse,这意味着:" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:310 +msgid "" +"(1) If the input length of a request is greater than `--max-num-batched-" +"tokens`, it will be divided into multiple rounds of computation according" +" to `--max-num-batched-tokens`;" +msgstr "(1)如果请求的输入长度大于 `--max-num-batched-tokens`,它将根据 `--max-num-batched-tokens` 被分成多轮计算;" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:311 +msgid "" +"(2) Decode requests are prioritized for scheduling, and prefill requests " +"are scheduled only if there is available capacity." +msgstr "(2)解码请求优先调度,预填充请求仅在有空闲容量时才会被调度。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:312 +msgid "" +"Generally, if `--max-num-batched-tokens` is set to a larger value, the " +"overall latency will be lower, but the pressure on GPU memory (activation" +" value usage) will be greater." +msgstr "通常,如果 `--max-num-batched-tokens` 设置得较大,整体延迟会更低,但 GPU 内存(激活值使用)的压力会更大。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:313 +msgid "" +"`--gpu-memory-utilization` represents the proportion of HBM that vLLM " +"will use for actual inference. Its essential function is to calculate the" +" available kv_cache size. During the warm-up phase (referred to as " +"profile run in vLLM), vLLM records the peak GPU memory usage during an " +"inference process with an input size of `--max-num-batched-tokens`. The " +"available kv_cache size is then calculated as: `--gpu-memory-utilization`" +" * HBM size - peak GPU memory usage. Therefore, the larger the value of " +"`--gpu-memory-utilization`, the more kv_cache can be used. However, since" +" the GPU memory usage during the warm-up phase may differ from that " +"during actual inference (e.g., due to uneven EP load), setting `--gpu-" +"memory-utilization` too high may lead to OOM (Out of Memory) issues " +"during actual inference. The default value is `0.9`." +msgstr "`--gpu-memory-utilization` 表示 vLLM 将用于实际推理的 HBM 比例。其核心功能是计算可用的 kv_cache 大小。在预热阶段(vLLM 中称为 profile run),vLLM 会记录输入大小为 `--max-num-batched-tokens` 的推理过程中的峰值 GPU 内存使用量。然后,可用的 kv_cache 大小计算为:`--gpu-memory-utilization` * HBM 大小 - 峰值 GPU 内存使用量。因此,`--gpu-memory-utilization` 的值越大,可用的 kv_cache 就越多。然而,由于预热阶段的 GPU 内存使用量可能与实际推理期间不同(例如,由于 EP 负载不均),将 `--gpu-memory-utilization` 设置得过高可能导致实际推理时出现 OOM(内存不足)问题。默认值为 `0.9`。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:314 +msgid "" +"`--enable-expert-parallel` indicates that EP is enabled. Note that vLLM " +"does not support a mixed approach of ETP and EP; that is, MoE can either " +"use pure EP or pure TP." +msgstr "`--enable-expert-parallel` 表示启用了 EP。请注意,vLLM 不支持 ETP 和 EP 的混合方法;也就是说,MoE 只能使用纯 EP 或纯 TP。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:315 +msgid "" +"`--no-enable-prefix-caching` indicates that prefix caching is disabled. " +"To enable it, remove this option." +msgstr "`--no-enable-prefix-caching` 表示前缀缓存被禁用。要启用它,请移除此选项。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:316 +msgid "" +"`--quantization` \"ascend\" indicates that quantization is used. To " +"disable quantization, remove this option." +msgstr "`--quantization` \"ascend\" 表示使用了量化。要禁用量化,请移除此选项。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:317 +msgid "" +"`--compilation-config` contains configurations related to the aclgraph " +"graph mode. The most significant configurations are \"cudagraph_mode\" " +"and \"cudagraph_capture_sizes\", which have the following meanings: " +"\"cudagraph_mode\": represents the specific graph mode. Currently, " +"\"PIECEWISE\" and \"FULL_DECODE_ONLY\" are supported. The graph mode is " +"mainly used to reduce the cost of operator dispatch. Currently, " +"\"FULL_DECODE_ONLY\" is recommended." +msgstr "`--compilation-config` 包含与 aclgraph 图模式相关的配置。最重要的配置是 \"cudagraph_mode\" 和 \"cudagraph_capture_sizes\",其含义如下:\"cudagraph_mode\":表示特定的图模式。目前支持 \"PIECEWISE\" 和 \"FULL_DECODE_ONLY\"。图模式主要用于降低算子调度的开销。目前推荐使用 \"FULL_DECODE_ONLY\"。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:319 +msgid "" +"\"cudagraph_capture_sizes\": represents different levels of graph modes. " +"The default value is [1, 2, 4, 8, 16, 24, 32, 40,..., `--max-num-seqs`]. " +"In the graph mode, the input for graphs at different levels is fixed, and" +" inputs between levels are automatically padded to the next level. " +"Currently, the default setting is recommended. Only in some scenarios is " +"it necessary to set this separately to achieve optimal performance." +msgstr "\"cudagraph_capture_sizes\":表示不同级别的图模式。默认值为 [1, 2, 4, 8, 16, 24, 32, 40,..., `--max-num-seqs`]。在图模式下,不同级别图的输入是固定的,级别之间的输入会自动填充到下一级别。目前推荐使用默认设置。仅在部分场景中,需要单独设置此参数以达到最佳性能。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:320 +msgid "" +"`export VLLM_ASCEND_ENABLE_FLASHCOMM1=1` indicates that Flashcomm1 " +"optimization is enabled. Currently, this optimization is only supported " +"for MoE in scenarios where tensor-parallel-size > 1." +msgstr "`export VLLM_ASCEND_ENABLE_FLASHCOMM1=1` 表示启用了 Flashcomm1 优化。目前,此优化仅在 tensor-parallel-size > 1 的场景下对 MoE 提供支持。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:321 +msgid "" +"`export VLLM_ASCEND_ENABLE_CONTEXT_PARALLEL=1` indicates that context " +"parallel is enabled. This environment variable is required in the PD " +"architecture but not needed in the PD co-locate deployment scenario. It " +"will be removed in the future." +msgstr "`export VLLM_ASCEND_ENABLE_CONTEXT_PARALLEL=1` 表示启用了上下文并行。此环境变量在 PD 架构中是必需的,但在 PD 共置部署场景中不需要。未来将被移除。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:323 +msgid "**Notice:**" +msgstr "**注意:**" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:325 +msgid "" +"tensor-parallel-size needs to be divisible by decode-context-parallel-" +"size." +msgstr "tensor-parallel-size 需要能被 decode-context-parallel-size 整除。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:326 +msgid "" +"decode-context-parallel-size must be less than or equal to tensor-" +"parallel-size." +msgstr "decode-context-parallel-size 必须小于或等于 tensor-parallel-size。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:328 +msgid "Accuracy Evaluation" +msgstr "精度评估" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:330 +msgid "Here are two accuracy evaluation methods." +msgstr "以下是两种精度评估方法。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:332 +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:344 +msgid "Using AISBench" +msgstr "使用 AISBench" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:334 +msgid "" +"Refer to [Using " +"AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " +"details." +msgstr "详情请参考[使用 AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:336 +msgid "" +"After execution, you can get the result, here is the result of " +"`DeepSeek-V3.1-w8a8` for reference only." +msgstr "执行后,您可以获得结果,以下是 `DeepSeek-V3.1-w8a8` 的结果,仅供参考。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:211 +msgid "dataset" +msgstr "数据集" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:211 +msgid "version" +msgstr "版本" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:211 +msgid "metric" +msgstr "指标" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:211 +msgid "mode" +msgstr "模式" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:211 +msgid "vllm-api-general-chat" +msgstr "vllm-api-general-chat" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:211 +msgid "aime2024" +msgstr "aime2024" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:211 +msgid "-" +msgstr "-" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:211 +msgid "accuracy" +msgstr "准确率" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:211 +msgid "gen" +msgstr "生成" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:211 +msgid "86.67" +msgstr "86.67" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:342 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:346 +msgid "" +"Refer to [Using AISBench for performance " +"evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation) for details." +msgstr "详情请参阅[使用 AISBench 进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:348 +msgid "Using vLLM Benchmark" +msgstr "使用 vLLM 基准测试" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:350 +msgid "Run performance evaluation of `DeepSeek-V3.1-w8a8` as an example." +msgstr "以运行 `DeepSeek-V3.1-w8a8` 的性能评估为例。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:352 +msgid "" +"Refer to [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/) " +"for more details." +msgstr "更多详情请参阅 [vllm 基准测试](https://docs.vllm.ai/en/latest/benchmarking/)。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:354 +msgid "There are three `vllm bench` subcommands:" +msgstr "`vllm bench` 包含三个子命令:" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:356 +msgid "`latency`: Benchmark the latency of a single batch of requests." +msgstr "`latency`:对单批请求的延迟进行基准测试。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:357 +msgid "`serve`: Benchmark the online serving throughput." +msgstr "`serve`:对在线服务吞吐量进行基准测试。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:358 +msgid "`throughput`: Benchmark offline inference throughput." +msgstr "`throughput`:对离线推理吞吐量进行基准测试。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:360 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例,按如下方式运行代码。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:367 +msgid "" +"After about several minutes, you can get the performance evaluation " +"result." +msgstr "大约几分钟后,您将获得性能评估结果。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:211 +msgid "ttft" +msgstr "首字元延迟" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:211 +msgid "random" +msgstr "随机" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:211 +msgid "performance" +msgstr "性能" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:211 +msgid "perf" +msgstr "性能" + +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:211 +msgid "20.7s" +msgstr "20.7秒" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_single_node.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_single_node.po new file mode 100644 index 00000000..159625fa --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_single_node.po @@ -0,0 +1,386 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:1 +msgid "Long-Sequence Context Parallel (Qwen3-235B-A22B)" +msgstr "长序列上下文并行 (Qwen3-235B-A22B)" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:3 +msgid "Getting Started" +msgstr "快速开始" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:5 +msgid "" +"vLLM-Ascend now supports long-sequence context parallel. This guide takes" +" one-by-one steps to verify these features with constrained resources." +msgstr "vLLM-Ascend 现已支持长序列上下文并行。本指南将引导您在使用有限资源的情况下,逐步验证这些功能。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:7 +msgid "" +"Using the `Qwen3-235B-A22B-w8a8` (Quantized version) model as an example," +" use 1 Atlas 800 A3 (64G × 16) server to deploy the single node \"pd co-" +"locate\" architecture." +msgstr "以 `Qwen3-235B-A22B-w8a8`(量化版本)模型为例,使用 1 台 Atlas 800 A3(64G × 16)服务器部署单节点 \"pd co-locate\" 架构。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:9 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:11 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:13 +msgid "" +"`Qwen3-235B-A22B-w8a8` (Quantized version): requires 1 Atlas 800 A3 (64G " +"× 16) node. [Download model weight](https://modelscope.cn/models/vllm-" +"ascend/Qwen3-235B-A22B-W8A8)" +msgstr "`Qwen3-235B-A22B-w8a8`(量化版本):需要 1 个 Atlas 800 A3(64G × 16)节点。[下载模型权重](https://modelscope.cn/models/vllm-ascend/Qwen3-235B-A22B-W8A8)" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:15 +msgid "" +"It is recommended to download the model weight to the shared directory of" +" multiple nodes, such as `/root/.cache/`" +msgstr "建议将模型权重下载到多节点的共享目录,例如 `/root/.cache/`" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:17 +msgid "Run with Docker" +msgstr "使用 Docker 运行" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:19 +msgid "Start a Docker container on each node." +msgstr "在每个节点上启动一个 Docker 容器。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:63 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:65 +msgid "Single-node Deployment" +msgstr "单节点部署" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:67 +msgid "" +"`Qwen3-235B-A22B-w8a8` can be deployed on 1 Atlas 800 A3(64G*16). " +"Quantized version needs to start with parameter `--quantization ascend`." +msgstr "`Qwen3-235B-A22B-w8a8` 可以部署在 1 台 Atlas 800 A3(64G*16)上。量化版本需要使用参数 `--quantization ascend` 启动。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:70 +msgid "Run the following script to execute online 128k inference." +msgstr "运行以下脚本以执行在线 128k 推理。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:106 +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:131 +msgid "**Notice:**" +msgstr "**注意:**" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:108 +#, python-brace-format +msgid "" +"for vllm version below `v0.12.0` use parameter: `--rope_scaling " +"'{\"rope_type\":\"yarn\",\"factor\":4,\"original_max_position_embeddings\":32768}'" +" \\`" +msgstr "对于 vllm 版本低于 `v0.12.0`,使用参数:`--rope_scaling '{\"rope_type\":\"yarn\",\"factor\":4,\"original_max_position_embeddings\":32768}' \\`" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:109 +#, python-brace-format +msgid "" +"for vllm version `v0.12.0` use parameter: `--hf-overrides " +"'{\"rope_parameters\": " +"{\"rope_type\":\"yarn\",\"rope_theta\":1000000,\"factor\":4,\"original_max_position_embeddings\":32768}}'" +" \\`" +msgstr "对于 vllm 版本 `v0.12.0`,使用参数:`--hf-overrides '{\"rope_parameters\": {\"rope_type\":\"yarn\",\"rope_theta\":1000000,\"factor\":4,\"original_max_position_embeddings\":32768}}' \\`" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:111 +msgid "The parameters are explained as follows:" +msgstr "参数解释如下:" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:113 +msgid "" +"`--tensor-parallel-size` 8 are common settings for tensor parallelism " +"(TP) sizes." +msgstr "`--tensor-parallel-size` 8 是张量并行(TP)大小的常见设置。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:114 +msgid "" +"`--prefill-context-parallel-size` 2 are common settings for prefill " +"context parallelism (PCP) sizes." +msgstr "`--prefill-context-parallel-size` 2 是预填充上下文并行(PCP)大小的常见设置。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:115 +msgid "" +"`--decode-context-parallel-size` 2 are common settings for decode context" +" parallelism (DCP) sizes." +msgstr "`--decode-context-parallel-size` 2 是解码上下文并行(DCP)大小的常见设置。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:116 +msgid "" +"`--max-model-len` represents the context length, which is the maximum " +"value of the input plus output for a single request." +msgstr "`--max-model-len` 表示上下文长度,即单个请求的输入加输出的最大值。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:117 +msgid "" +"`--max-num-seqs` indicates the maximum number of requests that each DP " +"group is allowed to process. If the number of requests sent to the " +"service exceeds this limit, the excess requests will remain in a waiting " +"state and will not be scheduled. Note that the time spent in the waiting " +"state is also counted in metrics such as TTFT and TPOT. Therefore, when " +"testing performance, it is generally recommended that `--max-num-seqs` * " +"`--data-parallel-size` >= the actual total concurrency." +msgstr "`--max-num-seqs` 表示每个 DP 组允许处理的最大请求数。如果发送到服务的请求数量超过此限制,超出的请求将保持在等待状态,不会被调度。请注意,在等待状态所花费的时间也会计入 TTFT 和 TPOT 等指标。因此,在测试性能时,通常建议 `--max-num-seqs` * `--data-parallel-size` >= 实际总并发数。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:118 +msgid "" +"`--max-num-batched-tokens` represents the maximum number of tokens that " +"the model can process in a single step. Currently, vLLM v1 scheduling " +"enables ChunkPrefill/SplitFuse by default, which means:" +msgstr "`--max-num-batched-tokens` 表示模型单步可以处理的最大 token 数。目前,vLLM v1 调度默认启用 ChunkPrefill/SplitFuse,这意味着:" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:119 +msgid "" +"(1) If the input length of a request is greater than `--max-num-batched-" +"tokens`, it will be divided into multiple rounds of computation according" +" to `--max-num-batched-tokens`;" +msgstr "(1)如果请求的输入长度大于 `--max-num-batched-tokens`,它将根据 `--max-num-batched-tokens` 被分成多轮计算;" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:120 +msgid "" +"(2) Decode requests are prioritized for scheduling, and prefill requests " +"are scheduled only if there is available capacity." +msgstr "(2)解码请求优先调度,预填充请求仅在有空闲容量时才会被调度。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:121 +msgid "" +"Generally, if `--max-num-batched-tokens` is set to a larger value, the " +"overall latency will be lower, but the pressure on GPU memory (activation" +" value usage) will be greater." +msgstr "通常,如果 `--max-num-batched-tokens` 设置得较大,整体延迟会更低,但 GPU 内存(激活值使用)的压力会更大。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:122 +msgid "" +"`--gpu-memory-utilization` represents the proportion of HBM that vLLM " +"will use for actual inference. Its essential function is to calculate the" +" available kv_cache size. During the warm-up phase (referred to as " +"profile run in vLLM), vLLM records the peak GPU memory usage during an " +"inference process with an input size of `--max-num-batched-tokens`. The " +"available kv_cache size is then calculated as: `--gpu-memory-utilization`" +" * HBM size - peak GPU memory usage. Therefore, the larger the value of " +"`--gpu-memory-utilization`, the more kv_cache can be used. However, since" +" the GPU memory usage during the warm-up phase may differ from that " +"during actual inference (e.g., due to uneven EP load), setting `--gpu-" +"memory-utilization` too high may lead to OOM (Out of Memory) issues " +"during actual inference. The default value is `0.9`." +msgstr "`--gpu-memory-utilization` 表示 vLLM 将用于实际推理的 HBM 比例。其核心功能是计算可用的 kv_cache 大小。在预热阶段(vLLM 中称为 profile run),vLLM 会记录输入大小为 `--max-num-batched-tokens` 的推理过程中的峰值 GPU 内存使用量。然后,可用的 kv_cache 大小计算为:`--gpu-memory-utilization` * HBM 大小 - 峰值 GPU 内存使用量。因此,`--gpu-memory-utilization` 的值越大,可用的 kv_cache 就越多。然而,由于预热阶段的 GPU 内存使用量可能与实际推理时不同(例如,由于 EP 负载不均),将 `--gpu-memory-utilization` 设置得过高可能导致实际推理时出现 OOM(内存不足)问题。默认值为 `0.9`。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:123 +msgid "" +"`--enable-expert-parallel` indicates that EP is enabled. Note that vLLM " +"does not support a mixed approach of ETP and EP; that is, MoE can either " +"use pure EP or pure TP." +msgstr "`--enable-expert-parallel` 表示启用了 EP。请注意,vLLM 不支持 ETP 和 EP 的混合方法;也就是说,MoE 要么使用纯 EP,要么使用纯 TP。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:124 +msgid "" +"`--no-enable-prefix-caching` indicates that prefix caching is disabled. " +"To enable it, remove this option." +msgstr "`--no-enable-prefix-caching` 表示前缀缓存被禁用。要启用它,请移除此选项。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:125 +msgid "" +"`--quantization` \"ascend\" indicates that quantization is used. To " +"disable quantization, remove this option." +msgstr "`--quantization` \"ascend\" 表示使用了量化。要禁用量化,请移除此选项。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:126 +msgid "" +"`--compilation-config` contains configurations related to the aclgraph " +"graph mode. The most significant configurations are \"cudagraph_mode\" " +"and \"cudagraph_capture_sizes\", which have the following meanings: " +"\"cudagraph_mode\": represents the specific graph mode. Currently, " +"\"PIECEWISE\" and \"FULL_DECODE_ONLY\" are supported. The graph mode is " +"mainly used to reduce the cost of operator dispatch. Currently, " +"\"FULL_DECODE_ONLY\" is recommended." +msgstr "`--compilation-config` 包含与 aclgraph 图模式相关的配置。最重要的配置是 \"cudagraph_mode\" 和 \"cudagraph_capture_sizes\",其含义如下:\"cudagraph_mode\":表示具体的图模式。目前支持 \"PIECEWISE\" 和 \"FULL_DECODE_ONLY\"。图模式主要用于降低算子调度的开销。目前推荐使用 \"FULL_DECODE_ONLY\"。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:128 +msgid "" +"\"cudagraph_capture_sizes\": represents different levels of graph modes. " +"The default value is [1, 2, 4, 8, 16, 24, 32, 40,..., `--max-num-seqs`]. " +"In the graph mode, the input for graphs at different levels is fixed, and" +" inputs between levels are automatically padded to the next level. " +"Currently, the default setting is recommended. Only in some scenarios is " +"it necessary to set this separately to achieve optimal performance." +msgstr "\"cudagraph_capture_sizes\":表示不同级别的图模式。默认值为 [1, 2, 4, 8, 16, 24, 32, 40,..., `--max-num-seqs`]。在图模式下,不同级别图的输入是固定的,级别之间的输入会自动填充到下一个级别。目前推荐使用默认设置。仅在部分场景中,需要单独设置此参数以达到最佳性能。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:129 +msgid "" +"`export VLLM_ASCEND_ENABLE_FLASHCOMM1=1` indicates that Flashcomm1 " +"optimization is enabled. Currently, this optimization is only supported " +"for MoE in scenarios where tp_size > 1." +msgstr "`export VLLM_ASCEND_ENABLE_FLASHCOMM1=1` 表示启用了 Flashcomm1 优化。目前,此优化仅在 tp_size > 1 的场景下对 MoE 支持。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:133 +msgid "tp_size needs to be divisible by dcp_size" +msgstr "tp_size 需要能被 dcp_size 整除" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:134 +msgid "" +"decode context parallel size must be less than or equal to max_dcp_size, " +"where max_dcp_size = tensor_parallel_size // total_num_kv_heads." +msgstr "解码上下文并行大小必须小于或等于 max_dcp_size,其中 max_dcp_size = tensor_parallel_size // total_num_kv_heads。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:136 +msgid "Accuracy Evaluation" +msgstr "精度评估" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:138 +msgid "Here are two accuracy evaluation methods." +msgstr "以下是两种精度评估方法。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:140 +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:152 +msgid "Using AISBench" +msgstr "使用 AISBench" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:142 +msgid "" +"Refer to [Using " +"AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " +"details." +msgstr "详情请参阅[使用 AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:144 +msgid "" +"After execution, you can get the result, here is the result of `Qwen3" +"-235B-A22B-w8a8` for reference only." +msgstr "执行后,您可以获得结果,以下是 `Qwen3-235B-A22B-w8a8` 的结果,仅供参考。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "dataset" +msgstr "数据集" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "version" +msgstr "版本" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "metric" +msgstr "指标" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "mode" +msgstr "模式" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "vllm-api-general-chat" +msgstr "vllm-api-general-chat" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "aime2024" +msgstr "aime2024" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "-" +msgstr "-" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "accuracy" +msgstr "准确率" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "gen" +msgstr "生成" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "83.33" +msgstr "83.33" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:150 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:154 +msgid "" +"Refer to [Using AISBench for performance " +"evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation) for details." +msgstr "详情请参阅[使用 AISBench 进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:156 +msgid "Using vLLM Benchmark" +msgstr "使用 vLLM Benchmark" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:158 +msgid "Run performance evaluation of `Qwen3-235B-A22B-w8a8` as an example." +msgstr "以运行 `Qwen3-235B-A22B-w8a8` 的性能评估为例。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:160 +msgid "" +"Refer to [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/) " +"for more details." +msgstr "更多详情请参阅 [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/)。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:162 +msgid "There are three `vllm bench` subcommands:" +msgstr "`vllm bench` 有三个子命令:" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:164 +msgid "`latency`: Benchmark the latency of a single batch of requests." +msgstr "`latency`:对单批请求的延迟进行基准测试。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:165 +msgid "`serve`: Benchmark the online serving throughput." +msgstr "`serve`:对在线服务吞吐量进行基准测试。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:166 +msgid "`throughput`: Benchmark offline inference throughput." +msgstr "`throughput`:对离线推理吞吐量进行基准测试。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:168 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例。运行代码如下。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:175 +msgid "" +"After about several minutes, you can get the performance evaluation " +"result." +msgstr "大约几分钟后,您将获得性能评估结果。" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "ttft" +msgstr "首词元时间" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "random" +msgstr "随机" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "performance" +msgstr "性能" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "perf" +msgstr "性能" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "17.36s" +msgstr "17.36秒" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_colocated_mooncake_multi_instance.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_colocated_mooncake_multi_instance.po new file mode 100644 index 00000000..e90435f4 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_colocated_mooncake_multi_instance.po @@ -0,0 +1,509 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:1 +msgid "PD-Colocated with Mooncake Multi-Instance" +msgstr "PD 共置与 Mooncake 多实例" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:3 +msgid "Getting Started" +msgstr "快速开始" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:5 +msgid "" +"vLLM-Ascend now supports PD-colocated deployment with Mooncake features. " +"This guide provides step-by-step instructions to test these features with" +" constrained resources." +msgstr "vLLM-Ascend 现已支持结合 Mooncake 功能的 PD 共置部署。本指南提供了在有限资源下测试这些功能的逐步说明。" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:9 +msgid "" +"Using the Qwen2.5-72B-Instruct model as an example, this guide " +"demonstrates how to use vllm-ascend v0.11.0 (with vLLM v0.11.0) on two " +"Atlas 800T A2 nodes to deploy two vLLM instances. Each instance occupies " +"4 NPU cards and uses PD-colocated deployment." +msgstr "本指南以 Qwen2.5-72B-Instruct 模型为例,演示如何在两个 Atlas 800T A2 节点上使用 vllm-ascend v0.11.0(包含 vLLM v0.11.0)部署两个 vLLM 实例。每个实例占用 4 个 NPU 卡,并采用 PD 共置部署。" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:14 +msgid "Verify Multi-Node Communication Environment" +msgstr "验证多节点通信环境" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:16 +msgid "Physical Layer Requirements" +msgstr "物理层要求" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:18 +msgid "" +"The two Atlas 800T A2 nodes must be physically interconnected via a RoCE " +"network. Without RoCE interconnection, cross-node KV Cache access " +"performance will be significantly degraded." +msgstr "两个 Atlas 800T A2 节点必须通过 RoCE 网络进行物理互连。若无 RoCE 互连,跨节点 KV Cache 访问性能将显著下降。" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:21 +msgid "" +"All NPU cards must communicate properly. Intra-node communication uses " +"HCCS, while inter-node communication uses the RoCE network." +msgstr "所有 NPU 卡必须能够正常通信。节点内通信使用 HCCS,节点间通信使用 RoCE 网络。" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:24 +msgid "Verification Process" +msgstr "验证流程" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:26 +msgid "" +"The following process serves as a reference example. Please modify " +"parameters such as IP addresses according to your actual environment." +msgstr "以下流程作为参考示例。请根据您的实际环境修改 IP 地址等参数。" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:29 +msgid "Single Node Verification:" +msgstr "单节点验证:" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:31 +msgid "" +"Execute the following commands sequentially. The results must all be " +"`success` and the status must be `UP`:" +msgstr "依次执行以下命令。结果必须全部为 `success` 且状态必须为 `UP`:" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:47 +msgid "Check NPU HCCN Configuration:" +msgstr "检查 NPU HCCN 配置:" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:49 +msgid "" +"Ensure that the hccn.conf file exists in the environment. If using " +"Docker, mount it into the container." +msgstr "确保环境中存在 hccn.conf 文件。如果使用 Docker,请将其挂载到容器中。" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:56 +msgid "Get NPU IP Addresses:" +msgstr "获取 NPU IP 地址:" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:62 +msgid "Cross-Node PING Test:" +msgstr "跨节点 PING 测试:" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:70 +msgid "Check NPU TLS Configuration" +msgstr "检查 NPU TLS 配置" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:77 +msgid "Run with Docker" +msgstr "使用 Docker 运行" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:79 +msgid "Start a Docker container on each node." +msgstr "在每个节点上启动一个 Docker 容器。" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:112 +msgid "(Optional) Install Mooncake" +msgstr "(可选)安装 Mooncake" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:114 +msgid "" +"Mooncake is pre-installed and functional in the v0.11.0 image. The " +"following installation steps are optional." +msgstr "Mooncake 在 v0.11.0 镜像中已预安装且功能正常。以下安装步骤是可选的。" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:117 +msgid "" +"Mooncake is the serving platform for Kimi, a leading LLM service provided" +" by Moonshot AI. Installation and compilation guide: ." +msgstr "Mooncake 是 Kimi 的服务平台,Kimi 是由 Moonshot AI 提供的领先 LLM 服务。安装和编译指南:。" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:121 +msgid "First, obtain the Mooncake project using the following command:" +msgstr "首先,使用以下命令获取 Mooncake 项目:" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:129 +msgid "Install MPI:" +msgstr "安装 MPI:" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:135 +msgid "Install the relevant dependencies (Go installation is not required):" +msgstr "安装相关依赖(无需安装 Go):" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:141 +msgid "Compile and install:" +msgstr "编译并安装:" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:151 +msgid "After installation, verify that Mooncake is installed correctly:" +msgstr "安装后,验证 Mooncake 是否正确安装:" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:160 +msgid "Start Mooncake Master Service" +msgstr "启动 Mooncake Master 服务" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:162 +msgid "" +"To start the Mooncake master service in one of the node containers, use " +"the following command:" +msgstr "要在其中一个节点容器中启动 Mooncake master 服务,请使用以下命令:" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "Parameter" +msgstr "参数" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "Value" +msgstr "值" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "Explanation" +msgstr "说明" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "port" +msgstr "端口" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "50088" +msgstr "50088" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "Port for the master service" +msgstr "Master 服务端口" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "eviction_high_watermark_ratio" +msgstr "驱逐高水位线比例" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "0.95" +msgstr "0.95" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "High watermark ratio (95% threshold)" +msgstr "高水位线比例(95% 阈值)" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "eviction_ratio" +msgstr "驱逐比例" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "0.05" +msgstr "0.05" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "Percentage to evict when full (5%)" +msgstr "缓存满时驱逐的百分比(5%)" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:179 +msgid "Create a Mooncake Configuration File Named mooncake.json" +msgstr "创建名为 mooncake.json 的 Mooncake 配置文件" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:181 +msgid "The template for the mooncake.json file is as follows:" +msgstr "mooncake.json 文件的模板如下:" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "metadata_server" +msgstr "元数据服务器" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "P2PHANDSHAKE" +msgstr "P2PHANDSHAKE" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "Point-to-point handshake mode" +msgstr "点对点握手模式" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "protocol" +msgstr "协议" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "ascend" +msgstr "ascend" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "Ascend proprietary protocol" +msgstr "Ascend 专有协议" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "master_server_address" +msgstr "主服务器地址" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "90.90.100.188:50088(for example)" +msgstr "90.90.100.188:50088(示例)" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "Master server address" +msgstr "主服务器地址" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "global_segment_size" +msgstr "全局段大小" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "107374182400" +msgstr "107374182400" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "Size per segment (100 GB)" +msgstr "每个段的大小(100 GB)" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:200 +msgid "vLLM Instance Deployment" +msgstr "vLLM 实例部署" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:202 +msgid "" +"Create containers on both Node 1 and Node 2, and launch the Qwen2.5-72B-" +"Instruct model service in each to test the reusability and performance of" +" cross-node, cross-instance KV Cache. Instance 1 utilizes NPU cards [0-3]" +" on the first Atlas 800T A2 server, while Instance 2 utilizes cards [0-3]" +" on the second server." +msgstr "在节点 1 和节点 2 上分别创建容器,并在每个容器中启动 Qwen2.5-72B-Instruct 模型服务,以测试跨节点、跨实例 KV Cache 的可重用性和性能。实例 1 使用第一个 Atlas 800T A2 服务器上的 NPU 卡 [0-3],而实例 2 使用第二个服务器上的卡 [0-3]。" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:208 +msgid "Deploy Instance 1" +msgstr "部署实例 1" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:210 +msgid "" +"Replace file paths, host, and port parameters based on your actual " +"environment configuration." +msgstr "请根据您的实际环境配置替换文件路径、主机和端口参数。" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:242 +msgid "Deploy Instance 2" +msgstr "部署实例 2" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:244 +msgid "" +"The deployment method for Instance 2 is identical to Instance 1. Simply " +"modify the `--host` and `--port` parameters according to your Instance 2 " +"configuration." +msgstr "实例 2 的部署方法与实例 1 相同。只需根据您的实例 2 配置修改 `--host` 和 `--port` 参数。" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:248 +msgid "Configuration Parameters" +msgstr "配置参数" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "kv_connector" +msgstr "kv_connector" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "MooncakeConnectorStoreV1" +msgstr "MooncakeConnectorStoreV1" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "Use StoreV1 version" +msgstr "使用 StoreV1 版本" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "kv_role" +msgstr "kv_role" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "kv_both" +msgstr "kv_both" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "Enable both produce and consume" +msgstr "同时启用生产和消费" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "use_layerwise" +msgstr "use_layerwise" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "false" +msgstr "false" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "Transfer entire cache (see note)" +msgstr "传输整个缓存(参见备注)" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "mooncake_rpc_port" +msgstr "mooncake_rpc_port" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "0" +msgstr "0" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "Automatic port assignment" +msgstr "自动端口分配" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "load_async" +msgstr "load_async" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "true" +msgstr "true" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "Enable asynchronous loading" +msgstr "启用异步加载" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "register_buffer" +msgstr "register_buffer" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "Required for PD-colocated mode" +msgstr "PD 共置模式必需" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:259 +msgid "**Note on use_layerwise:**" +msgstr "**关于 use_layerwise 的说明:**" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:261 +msgid "" +"`false`: Transfer entire KV Cache (suitable for cross-node with " +"sufficient bandwidth)" +msgstr "`false`: 传输整个KV缓存(适用于跨节点且带宽充足的情况)" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:263 +msgid "" +"`true`: Layer-by-layer transfer (suitable for single-node memory " +"constraints)" +msgstr "`true`: 逐层传输(适用于单节点内存受限的情况)" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:266 +msgid "Benchmark" +msgstr "性能基准测试" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:268 +msgid "" +"We recommend using the **AISBench** tool to assess performance. The test " +"uses **Dataset A**, consisting of fully random data, with the following " +"configuration:" +msgstr "我们推荐使用 **AISBench** 工具进行性能评估。测试使用 **数据集A**,该数据集由完全随机的数据组成,配置如下:" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:272 +msgid "Input/output tokens: 1024/10" +msgstr "输入/输出令牌数:1024/10" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:273 +msgid "Total requests: 100" +msgstr "总请求数:100" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:274 +msgid "Concurrency: 25" +msgstr "并发数:25" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:276 +msgid "The test procedure consists of three steps:" +msgstr "测试流程包含三个步骤:" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:278 +msgid "Step 1: Baseline (No Cache)" +msgstr "步骤 1:基准测试(无缓存)" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:280 +msgid "" +"Send Dataset A to Instance 1 on Node 1 and record the Time to First Token" +" (TTFT) as **TTFT1**." +msgstr "将数据集A发送到节点1上的实例1,并记录首令牌时间(TTFT)为 **TTFT1**。" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:283 +msgid "Preparation for Step 2" +msgstr "步骤 2 的准备工作" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:285 +msgid "" +"Before Step 2, send a fully random Dataset B to Instance 1. Due to the " +"unified HBM/DRAM KV Cache with LRU (Least Recently Used) eviction policy," +" Dataset B's cache evicts Dataset A's cache from HBM, leaving Dataset A's" +" cache only in Node 1's DRAM." +msgstr "在步骤2之前,向实例1发送一个完全随机的数据集B。由于采用了具有LRU(最近最少使用)淘汰策略的统一HBM/DRAM KV缓存,数据集B的缓存会将数据集A的缓存从HBM中淘汰,使得数据集A的缓存仅保留在节点1的DRAM中。" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:290 +msgid "Step 2: Local DRAM Hit" +msgstr "步骤 2:本地DRAM命中" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:292 +msgid "" +"Send Dataset A to Instance 1 again to measure the performance when " +"hitting the KV Cache in local DRAM. Record the TTFT as **TTFT2**." +msgstr "再次将数据集A发送到实例1,以测量命中本地DRAM中KV缓存时的性能。记录TTFT为 **TTFT2**。" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:295 +msgid "Step 3: Cross-Node DRAM Hit" +msgstr "步骤 3:跨节点DRAM命中" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:297 +msgid "" +"Send Dataset A to Instance 2. With the Mooncake KV Cache pool, this " +"results in a cross-node KV Cache hit from Node 1's DRAM. Record the TTFT " +"as **TTFT3**." +msgstr "将数据集A发送到实例2。借助Mooncake KV缓存池,这将导致一次来自节点1 DRAM的跨节点KV缓存命中。记录TTFT为 **TTFT3**。" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:301 +msgid "**Model Configuration**:" +msgstr "**模型配置**:" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:329 +msgid "**Performance Benchmarking Commands**:" +msgstr "**性能基准测试命令**:" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md:337 +msgid "Test Results" +msgstr "测试结果" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "Requests" +msgstr "请求数" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "Concur" +msgstr "并发数" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "TTFT1 (ms)" +msgstr "TTFT1 (毫秒)" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "TTFT2 (ms)" +msgstr "TTFT2 (毫秒)" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "TTFT3 (ms)" +msgstr "TTFT3 (毫秒)" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "100" +msgstr "100" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "25" +msgstr "25" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "2322" +msgstr "2322" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "739" +msgstr "739" + +#: ../../source/tutorials/features/pd_colocated_mooncake_multi_instance.md +msgid "948" +msgstr "948" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_multi_node.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_multi_node.po new file mode 100644 index 00000000..bf6e1c0c --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_multi_node.po @@ -0,0 +1,471 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:1 +msgid "Prefill-Decode Disaggregation (Deepseek)" +msgstr "预填充-解码解耦部署 (Deepseek)" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:3 +msgid "Getting Started" +msgstr "快速开始" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:5 +msgid "" +"vLLM-Ascend now supports prefill-decode (PD) disaggregation with EP " +"(Expert Parallel) options. This guide takes one-by-one steps to verify " +"these features with constrained resources." +msgstr "vLLM-Ascend 现已支持结合专家并行(EP)选项的预填充-解码(PD)解耦部署。本指南将逐步引导您在有限资源下验证这些功能。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:7 +msgid "" +"Take the Deepseek-r1-w8a8 model as an example, use 4 Atlas 800T A3 " +"servers to deploy the \"2P1D\" architecture. Assume the IP of the " +"prefiller server is 192.0.0.1 (prefill 1) and 192.0.0.2 (prefill 2), and " +"the decoder servers are 192.0.0.3 (decoder 1) and 192.0.0.4 (decoder 2). " +"On each server, use 8 NPUs 16 chips to deploy one service instance." +msgstr "以 Deepseek-r1-w8a8 模型为例,使用 4 台 Atlas 800T A3 服务器部署 \"2P1D\" 架构。假设预填充服务器 IP 为 192.0.0.1(预填充节点 1)和 192.0.0.2(预填充节点 2),解码服务器 IP 为 192.0.0.3(解码节点 1)和 192.0.0.4(解码节点 2)。每台服务器使用 8 个 NPU(16 个芯片)部署一个服务实例。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:9 +msgid "Verify Multi-Node Communication Environment" +msgstr "验证多节点通信环境" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:11 +msgid "Physical Layer Requirements" +msgstr "物理层要求" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:13 +msgid "" +"The physical machines must be located on the same WLAN, with network " +"connectivity." +msgstr "物理服务器必须位于同一局域网内,并具备网络连通性。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:14 +msgid "" +"All NPUs must be interconnected. Intra-node connectivity is via HCCS, and" +" inter-node connectivity is via RDMA." +msgstr "所有 NPU 必须能够互联。节点内通过 HCCS 连接,节点间通过 RDMA 连接。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:16 +msgid "Verification Process" +msgstr "验证流程" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:18 +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:27 +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:83 +msgid "" +"Execute the following commands on each node in sequence. The results must" +" all be `success` and the status must be `UP`:" +msgstr "依次在每个节点上执行以下命令。所有结果必须为 `success` 且状态必须为 `UP`:" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md +msgid "A3" +msgstr "A3" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:25 +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:81 +msgid "Single Node Verification:" +msgstr "单节点验证:" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:42 +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:98 +msgid "Check NPU HCCN Configuration:" +msgstr "检查 NPU HCCN 配置:" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:44 +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:100 +msgid "" +"Ensure that the hccn.conf file exists in the environment. If using " +"Docker, mount it into the container." +msgstr "确保环境中存在 hccn.conf 文件。如果使用 Docker,请将其挂载到容器中。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:50 +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:106 +msgid "Get NPU IP Addresses" +msgstr "获取 NPU IP 地址" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:57 +msgid "Get superpodid and SDID" +msgstr "获取 superpodid 和 SDID" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:63 +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:112 +msgid "Cross-Node PING Test" +msgstr "跨节点 PING 测试" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:70 +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:119 +msgid "Check NPU TLS Configuration" +msgstr "检查 NPU TLS 配置" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md +msgid "A2" +msgstr "A2" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:128 +msgid "Run with Docker" +msgstr "使用 Docker 运行" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:130 +msgid "Start a Docker container on each node." +msgstr "在每个节点上启动一个 Docker 容器。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:174 +msgid "Install Mooncake" +msgstr "安装 Mooncake" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:176 +msgid "" +"Mooncake is the serving platform for Kimi, a leading LLM service provided" +" by Moonshot AI.Installation and Compilation Guide: First, we" +" need to obtain the Mooncake project. Refer to the following command:" +msgstr "Mooncake 是月之暗面(Moonshot AI)提供的领先 LLM 服务 Kimi 的推理平台。安装与编译指南: 首先,我们需要获取 Mooncake 项目。参考以下命令:" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:183 +msgid "(Optional) Replace go install url if the network is poor" +msgstr "(可选)如果网络状况不佳,请替换 go install 的 URL" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:190 +msgid "Install mpi" +msgstr "安装 mpi" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:196 +msgid "Install the relevant dependencies. The installation of Go is not required." +msgstr "安装相关依赖。无需安装 Go。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:202 +msgid "Compile and install" +msgstr "编译并安装" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:212 +msgid "Set environment variables" +msgstr "设置环境变量" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:214 +msgid "**Note:**" +msgstr "**注意:**" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:216 +msgid "Adjust the Python path according to your specific Python installation" +msgstr "请根据您具体的 Python 安装路径进行调整" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:217 +msgid "" +"Ensure `/usr/local/lib` and `/usr/local/lib64` are in your " +"`LD_LIBRARY_PATH`" +msgstr "确保 `/usr/local/lib` 和 `/usr/local/lib64` 在您的 `LD_LIBRARY_PATH` 中" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:223 +msgid "Prefiller/Decoder Deployment" +msgstr "预填充器/解码器部署" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:225 +msgid "" +"We can run the following scripts to launch a server on the " +"prefiller/decoder node, respectively. Please note that each P/D node will" +" occupy ports ranging from kv_port to kv_port + num_chips to initialize " +"socket listeners. To avoid any issues, port conflicts should be " +"prevented. Additionally, ensure that each node's engine_id is uniquely " +"assigned to avoid conflicts." +msgstr "我们可以分别运行以下脚本来在预填充器/解码器节点上启动服务器。请注意,每个 P/D 节点将占用从 kv_port 到 kv_port + num_chips 的端口范围来初始化 socket 监听器。为避免问题,应防止端口冲突。此外,请确保每个节点的 engine_id 被唯一分配,以避免冲突。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:227 +msgid "kv_port Configuration Guide" +msgstr "kv_port 配置指南" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:229 +msgid "" +"On Ascend NPU, Mooncake uses AscendDirectTransport for RDMA data " +"transfer, which randomly allocates ports within range `[20000, 20000 + " +"npu_per_node × 1000)`. If `kv_port` overlaps with this range, " +"intermittent port conflicts may occur. To avoid this, configure `kv_port`" +" according to the table below:" +msgstr "在 Ascend NPU 上,Mooncake 使用 AscendDirectTransport 进行 RDMA 数据传输,它会在 `[20000, 20000 + npu_per_node × 1000)` 范围内随机分配端口。如果 `kv_port` 与此范围重叠,可能会发生间歇性端口冲突。为避免此问题,请根据下表配置 `kv_port`:" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:132 +msgid "NPUs per Node" +msgstr "每节点 NPU 数量" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:132 +msgid "Reserved Port Range" +msgstr "保留端口范围" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:132 +msgid "Recommended kv_port" +msgstr "推荐 kv_port" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:132 +msgid "8" +msgstr "8" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:132 +msgid "20000 - 27999" +msgstr "20000 - 27999" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:132 +msgid ">= 28000" +msgstr ">= 28000" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:132 +msgid "16" +msgstr "16" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:132 +msgid "20000 - 35999" +msgstr "20000 - 35999" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:132 +msgid ">= 36000" +msgstr ">= 36000" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:237 +msgid "" +"If you occasionally see `zmq.error.ZMQError: Address already in use` " +"during startup, it may be caused by kv_port conflicting with randomly " +"allocated AscendDirectTransport ports. Increase your kv_port value to " +"avoid the reserved range." +msgstr "如果在启动时偶尔看到 `zmq.error.ZMQError: Address already in use`,可能是由于 kv_port 与随机分配的 AscendDirectTransport 端口冲突所致。请增加您的 kv_port 值以避开保留范围。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:240 +msgid "launch_online_dp.py" +msgstr "launch_online_dp.py" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:242 +msgid "" +"Use `launch_online_dp.py` to launch external dp vllm servers. " +"[launch\\_online\\_dp.py](https://github.com/vllm-project/vllm-" +"ascend/blob/main/examples/external_online_dp/launch_online_dp.py)" +msgstr "使用 `launch_online_dp.py` 启动外部解耦 vllm 服务器。[launch\\_online\\_dp.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/external_online_dp/launch_online_dp.py)" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:245 +msgid "run_dp_template.sh" +msgstr "run_dp_template.sh" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:247 +msgid "" +"Modify `run_dp_template.sh` on each node. " +"[run\\_dp\\_template.sh](https://github.com/vllm-project/vllm-" +"ascend/blob/main/examples/external_online_dp/run_dp_template.sh)" +msgstr "在每个节点上修改 `run_dp_template.sh`。[run\\_dp\\_template.sh](https://github.com/vllm-project/vllm-ascend/blob/main/examples/external_online_dp/run_dp_template.sh)" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:250 +msgid "Layerwise" +msgstr "分层模式" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md +msgid "Prefiller node 1" +msgstr "预填充节点 1" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md +msgid "Prefiller node 2" +msgstr "预填充节点 2" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md +msgid "Decoder node 1" +msgstr "解码节点 1" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md +msgid "Decoder node 2" +msgstr "解码节点 2" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:493 +msgid "Non-layerwise" +msgstr "非分层模式" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:735 +msgid "Start the service" +msgstr "启动服务" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:748 +msgid "Example Proxy for Deployment" +msgstr "部署示例代理" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:750 +msgid "" +"Run a proxy server on the same node where your prefiller service instance" +" is deployed. You can find the proxy implementation in the repository's " +"examples directory." +msgstr "在部署了预填充器服务实例的同一节点上运行一个代理服务器。您可以在仓库的 examples 目录中找到代理实现。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:752 +msgid "" +"We provide two different proxy implementations with distinct request " +"routing behaviors:" +msgstr "我们提供两种具有不同请求路由行为的代理实现:" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:754 +msgid "" +"**`load_balance_proxy_layerwise_server_example.py`**: Requests are first " +"routed to the D nodes, which then forward to the P nodes as needed.This " +"proxy is designed for use with the " +"MooncakeLayerwiseConnector.[load\\_balance\\_proxy\\_layerwise\\_server\\_example.py](https://github.com" +"/vllm-project/vllm-" +"ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py)" +msgstr "**`load_balance_proxy_layerwise_server_example.py`**:请求首先被路由到 D 节点,然后根据需要转发到 P 节点。此代理设计用于与 MooncakeLayerwiseConnector 配合使用。[load\\_balance\\_proxy\\_layerwise\\_server\\_example.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py)" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:756 +msgid "" +"**`load_balance_proxy_server_example.py`**: Requests are first routed to " +"the P nodes, which then forward to the D nodes for subsequent " +"processing.This proxy is designed for use with the " +"MooncakeConnector.[load\\_balance\\_proxy\\_server\\_example.py](https://github.com" +"/vllm-project/vllm-" +"ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" +msgstr "**`load_balance_proxy_server_example.py`**:请求首先被路由到 P 节点,然后转发到 D 节点进行后续处理。此代理设计用于与 MooncakeConnector 配合使用。[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 +msgid "Parameter" +msgstr "参数" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 +msgid "meaning" +msgstr "含义" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 +msgid "--port" +msgstr "--port" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 +msgid "Proxy service Port" +msgstr "代理服务端口" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 +msgid "--host" +msgstr "--host" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 +msgid "Proxy service Host IP" +msgstr "代理服务主机 IP" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 +msgid "--prefiller-hosts" +msgstr "--prefiller-hosts" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 +msgid "Hosts of prefiller nodes" +msgstr "预填充节点主机列表" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 +msgid "--prefiller-ports" +msgstr "--prefiller-ports" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 +msgid "Ports of prefiller nodes" +msgstr "预填充节点的端口" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 +msgid "--decoder-hosts" +msgstr "--decoder-hosts" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 +msgid "Hosts of decoder nodes" +msgstr "解码器节点的主机地址" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 +msgid "--decoder-ports" +msgstr "--decoder-ports" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 +msgid "Ports of decoder nodes" +msgstr "解码器节点的端口" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:877 +msgid "" +"You can get the proxy program in the repository's examples, " +"[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-" +"project/vllm-" +"ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" +msgstr "" +"您可以在代码仓库的示例中找到代理程序," +"[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-" +"project/vllm-" +"ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:879 +msgid "Benchmark" +msgstr "基准测试" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:881 +msgid "" +"We recommend use aisbench tool to assess performance. " +"[aisbench](https://gitee.com/aisbench/benchmark) Execute the following " +"commands to install aisbench" +msgstr "" +"我们推荐使用 aisbench 工具进行性能评估。" +"[aisbench](https://gitee.com/aisbench/benchmark) 执行以下命令安装 aisbench" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:889 +msgid "" +"You need to cancel the http proxy before assessing performance, as " +"following" +msgstr "在评估性能前,您需要取消 HTTP 代理设置,如下所示" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:897 +msgid "You can place your datasets in the dir: `benchmark/ais_bench/datasets`" +msgstr "您可以将数据集放置在目录:`benchmark/ais_bench/datasets` 中" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:898 +msgid "" +"You can change the configuration in the dir " +":`benchmark/ais_bench/benchmark/configs/models/vllm_api` Take the " +"``vllm_api_stream_chat.py`` for example" +msgstr "" +"您可以在目录 `benchmark/ais_bench/benchmark/configs/models/vllm_api` 中修改配置。以 " +"`vllm_api_stream_chat.py` 为例" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:924 +msgid "" +"Take gsm8k dataset for example, execute the following commands to assess" +" performance." +msgstr "以 gsm8k 数据集为例,执行以下命令来评估性能。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:930 +msgid "" +"For more details for commands and parameters for aisbench, refer to " +"[aisbench](https://gitee.com/aisbench/benchmark)" +msgstr "有关 aisbench 命令和参数的更多详细信息,请参考 [aisbench](https://gitee.com/aisbench/benchmark)" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:932 +msgid "FAQ" +msgstr "常见问题" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:934 +msgid "1. Prefiller nodes need to warmup" +msgstr "1. 预填充节点需要预热" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:936 +msgid "" +"Since the computation of some NPU operators requires several rounds of " +"warm-up to achieve best performance, we recommend preheating the service " +"with some requests before conducting performance tests to achieve the " +"best end-to-end throughput." +msgstr "" +"由于部分 NPU 算子的计算需要经过多轮预热才能达到最佳性能,我们建议在进行性能测试前,先用一些请求预热服务,以获得最佳的端到端吞吐量。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:938 +msgid "Verification" +msgstr "验证" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:940 +msgid "Check service health using the proxy server endpoint." +msgstr "使用代理服务器端点检查服务健康状况。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_single_node.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_single_node.po new file mode 100644 index 00000000..0de9ebcc --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_single_node.po @@ -0,0 +1,213 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:1 +msgid "Prefill-Decode Disaggregation (Qwen2.5-VL)" +msgstr "预填充-解码解耦架构 (Qwen2.5-VL)" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:3 +msgid "Getting Start" +msgstr "开始使用" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:5 +msgid "" +"vLLM-Ascend now supports prefill-decode (PD) disaggregation. This guide " +"takes one-by-one steps to verify these features with constrained " +"resources." +msgstr "vLLM-Ascend 现已支持预填充-解码 (PD) 解耦架构。本指南将逐步引导您在有限资源下验证这些功能。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:7 +msgid "" +"Using the Qwen2.5-VL-7B-Instruct model as an example, use vllm-ascend " +"v0.11.0rc1 (with vLLM v0.11.0) on 1 Atlas 800T A2 server to deploy the " +"\"1P1D\" architecture. Assume the IP address is 192.0.0.1." +msgstr "以 Qwen2.5-VL-7B-Instruct 模型为例,在 1 台 Atlas 800T A2 服务器上使用 vllm-ascend v0.11.0rc1 (包含 vLLM v0.11.0) 部署 \"1P1D\" 架构。假设 IP 地址为 192.0.0.1。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:9 +msgid "Verify Communication Environment" +msgstr "验证通信环境" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:11 +msgid "Verification Process" +msgstr "验证流程" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:13 +msgid "Single Node Verification:" +msgstr "单节点验证:" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:15 +msgid "" +"Execute the following commands in sequence. The results must all be " +"`success` and the status must be `UP`:" +msgstr "依次执行以下命令。结果必须均为 `success` 且状态必须为 `UP`:" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:30 +msgid "Check NPU HCCN Configuration:" +msgstr "检查 NPU HCCN 配置:" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:32 +msgid "" +"Ensure that the hccn.conf file exists in the environment. If using " +"Docker, mount it into the container." +msgstr "确保环境中存在 hccn.conf 文件。如果使用 Docker,请将其挂载到容器中。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:38 +msgid "Get NPU IP Addresses" +msgstr "获取 NPU IP 地址" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:44 +msgid "Cross-Node PING Test" +msgstr "跨节点 PING 测试" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:51 +msgid "Check NPU TLS Configuration" +msgstr "检查 NPU TLS 配置" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:58 +msgid "Run with Docker" +msgstr "使用 Docker 运行" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:60 +msgid "Start a Docker container." +msgstr "启动一个 Docker 容器。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:95 +msgid "Install Mooncake" +msgstr "安装 Mooncake" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:97 +msgid "" +"Mooncake is the serving platform for Kimi, a leading LLM service provided" +" by Moonshot AI. Installation and Compilation Guide: . First, " +"we need to obtain the Mooncake project. Refer to the following command:" +msgstr "Mooncake 是 Kimi 的服务平台,Kimi 是由 Moonshot AI 提供的领先 LLM 服务。安装与编译指南:。首先,我们需要获取 Mooncake 项目。参考以下命令:" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:104 +msgid "(Optional) Replace go install url if the network is poor." +msgstr "(可选)如果网络状况不佳,请替换 go install 的 URL。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:111 +msgid "Install mpi." +msgstr "安装 mpi。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:117 +msgid "Install the relevant dependencies. The installation of Go is not required." +msgstr "安装相关依赖。无需安装 Go。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:123 +msgid "Compile and install." +msgstr "编译并安装。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:133 +msgid "Set environment variables." +msgstr "设置环境变量。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:135 +msgid "**Note:**" +msgstr "**注意:**" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:137 +msgid "Adjust the Python path according to your specific Python installation" +msgstr "根据您具体的 Python 安装情况调整 Python 路径" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:138 +msgid "" +"Ensure `/usr/local/lib` and `/usr/local/lib64` are in your " +"`LD_LIBRARY_PATH`" +msgstr "确保 `/usr/local/lib` 和 `/usr/local/lib64` 在您的 `LD_LIBRARY_PATH` 中" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:144 +msgid "Prefiller/Decoder Deployment" +msgstr "预填充器/解码器部署" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:146 +msgid "" +"We can run the following scripts to launch a server on the " +"prefiller/decoder NPU, respectively." +msgstr "我们可以分别运行以下脚本来在预填充器/解码器 NPU 上启动服务器。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md +msgid "Prefiller" +msgstr "预填充器" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md +msgid "Decoder" +msgstr "解码器" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:236 +msgid "" +"If you want to run \"2P1D\", please set ASCEND_RT_VISIBLE_DEVICES and " +"port to different values for each P process." +msgstr "如果您想运行 \"2P1D\",请为每个 P 进程将 ASCEND_RT_VISIBLE_DEVICES 和 port 设置为不同的值。" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:238 +msgid "Example Proxy for Deployment" +msgstr "部署示例代理" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:240 +msgid "" +"Run a proxy server on the same node with the prefiller service instance. " +"You can get the proxy program in the repository's examples: " +"[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-" +"project/vllm-" +"ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" +msgstr "在与预填充器服务实例相同的节点上运行一个代理服务器。您可以在仓库的示例中找到该代理程序:[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:193 +msgid "Parameter" +msgstr "参数" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:193 +msgid "Meaning" +msgstr "含义" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:193 +msgid "--port" +msgstr "--port" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:193 +msgid "Port of proxy" +msgstr "代理端口" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:193 +msgid "--prefiller-port" +msgstr "--prefiller-port" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:193 +msgid "All ports of prefill" +msgstr "所有预填充端口" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:193 +msgid "--decoder-ports" +msgstr "--decoder-ports" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:193 +msgid "All ports of decoder" +msgstr "所有解码器端口" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:258 +msgid "Verification" +msgstr "验证" + +#: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:260 +msgid "Check service health using the proxy server endpoint." +msgstr "使用代理服务器端点检查服务健康状态。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/ray.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/ray.po new file mode 100644 index 00000000..a2fa2510 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/ray.po @@ -0,0 +1,219 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/features/ray.md:1 +msgid "Ray Distributed (Qwen3-235B-A22B)" +msgstr "Ray 分布式部署 (Qwen3-235B-A22B)" + +#: ../../source/tutorials/features/ray.md:3 +msgid "" +"Multi-node inference is suitable for scenarios where the model cannot be " +"deployed on a single machine. In such cases, the model can be distributed" +" using tensor parallelism or pipeline parallelism. The specific " +"parallelism strategies will be covered in the following sections. To " +"successfully deploy multi-node inference, the following three steps need " +"to be completed:" +msgstr "" +"多节点推理适用于模型无法在单机上部署的场景。在这种情况下,可以使用张量并行或流水线并行来分布模型。具体的并行策略将在后续章节中介绍。要成功部署多节点推理,需要完成以下三个步骤:" + +#: ../../source/tutorials/features/ray.md:5 +msgid "**Verify Multi-Node Communication Environment**" +msgstr "**验证多节点通信环境**" + +#: ../../source/tutorials/features/ray.md:6 +msgid "**Set Up and Start the Ray Cluster**" +msgstr "**设置并启动 Ray 集群**" + +#: ../../source/tutorials/features/ray.md:7 +msgid "**Start the Online Inference Service on Multi-node**" +msgstr "**在多节点上启动在线推理服务**" + +#: ../../source/tutorials/features/ray.md:9 +msgid "Verify Multi-Node Communication Environment" +msgstr "验证多节点通信环境" + +#: ../../source/tutorials/features/ray.md:11 +msgid "Physical Layer Requirements" +msgstr "物理层要求" + +#: ../../source/tutorials/features/ray.md:13 +msgid "" +"The physical machines must be located on the same LAN, with network " +"connectivity." +msgstr "物理机必须位于同一局域网内,并具备网络连通性。" + +#: ../../source/tutorials/features/ray.md:14 +msgid "" +"All NPUs are connected with optical modules, and the connection status " +"must be normal." +msgstr "所有 NPU 均通过光模块连接,且连接状态必须正常。" + +#: ../../source/tutorials/features/ray.md:16 +msgid "Verification Process" +msgstr "验证流程" + +#: ../../source/tutorials/features/ray.md:18 +msgid "" +"Execute the following commands on each node in sequence. The results must" +" all be `success` and the status must be `UP`:" +msgstr "依次在每个节点上执行以下命令。结果必须均为 `success`,状态必须为 `UP`:" + +#: ../../source/tutorials/features/ray.md:35 +msgid "NPU Interconnect Verification" +msgstr "NPU 互联验证" + +#: ../../source/tutorials/features/ray.md:37 +msgid "1. Get NPU IP Addresses" +msgstr "1. 获取 NPU IP 地址" + +#: ../../source/tutorials/features/ray.md:43 +msgid "2. Cross-Node PING Test" +msgstr "2. 跨节点 PING 测试" + +#: ../../source/tutorials/features/ray.md:50 +msgid "Set Up and Start the Ray Cluster" +msgstr "设置并启动 Ray 集群" + +#: ../../source/tutorials/features/ray.md:52 +msgid "Setting Up the Basic Container" +msgstr "设置基础容器" + +#: ../../source/tutorials/features/ray.md:54 +msgid "" +"To ensure a consistent execution environment across all nodes, including " +"the model path and Python environment, it is advised to use Docker " +"images." +msgstr "为确保所有节点(包括模型路径和 Python 环境)的执行环境一致,建议使用 Docker 镜像。" + +#: ../../source/tutorials/features/ray.md:56 +msgid "" +"For setting up a multi-node inference cluster with Ray, **containerized " +"deployment** is the preferred approach. Containers should be started on " +"both the primary and secondary nodes, with the `--net=host` option to " +"enable proper network connectivity." +msgstr "对于使用 Ray 设置多节点推理集群,**容器化部署**是首选方法。应在主节点和从节点上都启动容器,并使用 `--net=host` 选项以确保正确的网络连接。" + +#: ../../source/tutorials/features/ray.md:58 +msgid "" +"Below is the example container setup command, which should be executed on" +" **all nodes** :" +msgstr "以下是容器设置命令示例,应在 **所有节点** 上执行:" + +#: ../../source/tutorials/features/ray.md:94 +msgid "Start Ray Cluster" +msgstr "启动 Ray 集群" + +#: ../../source/tutorials/features/ray.md:96 +msgid "" +"After setting up the containers and installing vllm-ascend on each node, " +"follow the steps below to start the Ray cluster and execute inference " +"tasks." +msgstr "在每个节点上设置好容器并安装 vllm-ascend 后,按照以下步骤启动 Ray 集群并执行推理任务。" + +#: ../../source/tutorials/features/ray.md:98 +msgid "" +"Choose one machine as the primary node and the others as secondary nodes." +" Before proceeding, use `ip addr` to check your `nic_name` (network " +"interface name)." +msgstr "选择一台机器作为主节点,其他作为从节点。在继续之前,使用 `ip addr` 检查您的 `nic_name`(网络接口名称)。" + +#: ../../source/tutorials/features/ray.md:100 +msgid "" +"Set the `ASCEND_RT_VISIBLE_DEVICES` environment variable to specify the " +"NPU devices to use. For Ray versions above 2.1, also set the " +"`RAY_EXPERIMENTAL_NOSET_ASCEND_RT_VISIBLE_DEVICES` variable to avoid " +"device recognition issues." +msgstr "设置 `ASCEND_RT_VISIBLE_DEVICES` 环境变量以指定要使用的 NPU 设备。对于 Ray 2.1 以上版本,还需设置 `RAY_EXPERIMENTAL_NOSET_ASCEND_RT_VISIBLE_DEVICES` 变量以避免设备识别问题。" + +#: ../../source/tutorials/features/ray.md:102 +msgid "Below are the commands for the primary and secondary nodes:" +msgstr "以下是主节点和从节点的命令:" + +#: ../../source/tutorials/features/ray.md:104 +msgid "**Primary node**:" +msgstr "**主节点**:" + +#: ../../source/tutorials/features/ray.md:107 +#: ../../source/tutorials/features/ray.md:124 +msgid "" +"When starting a Ray cluster for multi-node inference, the environment " +"variables on each node must be set **before** starting the Ray cluster " +"for them to take effect. Updating the environment variables requires " +"restarting the Ray cluster." +msgstr "在为多节点推理启动 Ray 集群时,必须在启动 Ray 集群 **之前** 设置每个节点上的环境变量,它们才会生效。更新环境变量需要重启 Ray 集群。" + +#: ../../source/tutorials/features/ray.md:121 +msgid "**Secondary node**:" +msgstr "**从节点**:" + +#: ../../source/tutorials/features/ray.md:137 +msgid "" +"Once the cluster is started on multiple nodes, execute `ray status` and " +"`ray list nodes` to verify the Ray cluster's status. You should see the " +"correct number of nodes and NPUs listed." +msgstr "在多个节点上启动集群后,执行 `ray status` 和 `ray list nodes` 以验证 Ray 集群的状态。您应该看到列出的正确节点数和 NPU 数。" + +#: ../../source/tutorials/features/ray.md:139 +msgid "" +"After Ray is successfully started, the following content will appear: A " +"local Ray instance has started successfully. Dashboard URL: The access " +"address for the Ray Dashboard (default: ); Node " +"status (CPU/memory resources, number of healthy nodes); Cluster " +"connection address (used for adding multiple nodes)." +msgstr "Ray 成功启动后,将出现以下内容:本地 Ray 实例已成功启动。仪表板 URL:Ray 仪表板的访问地址(默认:);节点状态(CPU/内存资源、健康节点数);集群连接地址(用于添加多个节点)。" + +#: ../../source/tutorials/features/ray.md:143 +msgid "Start the Online Inference Service on Multi-node scenario" +msgstr "在多节点场景下启动在线推理服务" + +#: ../../source/tutorials/features/ray.md:145 +msgid "" +"In the container, you can use vLLM as if all NPUs were on a single node. " +"vLLM will utilize NPU resources across all nodes in the Ray cluster." +msgstr "在容器中,您可以像所有 NPU 都在单个节点上一样使用 vLLM。vLLM 将利用 Ray 集群中所有节点的 NPU 资源。" + +#: ../../source/tutorials/features/ray.md:147 +msgid "**You only need to run the vllm command on one node.**" +msgstr "**您只需在一个节点上运行 vllm 命令。**" + +#: ../../source/tutorials/features/ray.md:149 +msgid "" +"To set up parallelism, the common practice is to set the `tensor-" +"parallel-size` to the number of NPUs per node, and the `pipeline-" +"parallel-size` to the number of nodes." +msgstr "要设置并行,通常的做法是将 `tensor-parallel-size` 设置为每个节点的 NPU 数量,将 `pipeline-parallel-size` 设置为节点数量。" + +#: ../../source/tutorials/features/ray.md:151 +msgid "" +"For example, with 16 NPUs across 2 nodes (8 NPUs per node), set the " +"tensor parallel size to 8 and the pipeline parallel size to 2:" +msgstr "例如,对于分布在 2 个节点上的 16 个 NPU(每个节点 8 个 NPU),将张量并行大小设置为 8,流水线并行大小设置为 2:" + +#: ../../source/tutorials/features/ray.md:167 +msgid "" +"Alternatively, if you want to use only tensor parallelism, set the tensor" +" parallel size to the total number of NPUs in the cluster. For example, " +"with 16 NPUs across 2 nodes, set the tensor parallel size to 16:" +msgstr "或者,如果您只想使用张量并行,请将张量并行大小设置为集群中 NPU 的总数。例如,对于分布在 2 个节点上的 16 个 NPU,将张量并行大小设置为 16:" + +#: ../../source/tutorials/features/ray.md:182 +msgid "Once your server is started, you can query the model with input prompts:" +msgstr "服务器启动后,您可以使用输入提示词查询模型:" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/suffix_speculative_decoding.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/suffix_speculative_decoding.po new file mode 100644 index 00000000..b992aa44 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/suffix_speculative_decoding.po @@ -0,0 +1,854 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:1 +msgid "Suffix Speculative Decoding" +msgstr "后缀推测解码" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:3 +msgid "**Introduction**" +msgstr "**简介**" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:5 +msgid "" +"Suffix Decoding is an optimization technique for speculative decoding " +"based on pattern matching. It simultaneously retrieves repetitive " +"sequences from both the prompt and the generated content, using frequency" +" statistics to predict the most likely token continuations. Unlike " +"traditional speculative decoding methods, Suffix Decoding runs entirely " +"on the CPU, eliminating the need for additional GPU resources or draft " +"models, which results in superior acceleration for repetitive tasks such " +"as AI agents and code generation." +msgstr "" +"后缀解码是一种基于模式匹配的推测解码优化技术。它同时从提示词和已生成内容中检索重复序列,利用频率统计来预测最可能的后续标记。与传统的推测解码方法不同,后缀解码完全在CPU上运行,无需额外的GPU资源或草稿模型,从而在AI智能体和代码生成等重复性任务上实现卓越的加速效果。" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:7 +msgid "" +"This document provides step-by-step guidance on how to deploy and " +"benchmark the Suffix Decoding speculative inference technology supported " +"by `vllm-ascend` on Atlas A2 hardware. The setup utilizes a single Atlas " +"800T A2 node with a 4-card deployment of the Qwen3-32B model instance. " +"Benchmarking is conducted using authentic open-source datasets covering " +"the following categories:" +msgstr "" +"本文档提供了在Atlas A2硬件上部署和基准测试`vllm-ascend`支持的后缀解码推测推理技术的分步指南。该设置使用单个Atlas 800T A2节点,部署了4卡的Qwen3-32B模型实例。基准测试使用涵盖以下类别的真实开源数据集进行:" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "**Dataset Category**" +msgstr "**数据集类别**" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "**Dataset Name**" +msgstr "**数据集名称**" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "Code Generation" +msgstr "代码生成" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "HumanEval" +msgstr "HumanEval" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "Common Sense Reasoning" +msgstr "常识推理" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "ARC" +msgstr "ARC" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "Mathematical Reasoning" +msgstr "数学推理" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "gsm8k" +msgstr "gsm8k" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "Natural Language Understanding" +msgstr "自然语言理解" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "SuperGLUE_BoolQ" +msgstr "SuperGLUE_BoolQ" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "Comprehensive Examination" +msgstr "综合评测" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "AGIEval" +msgstr "AGIEval" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "Multi-turn Dialogue" +msgstr "多轮对话" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "ShareGPT" +msgstr "ShareGPT" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:18 +#, python-format +msgid "" +"The benchmarking tool used in this tutorial is AISBench, which supports " +"performance testing for all the datasets listed above. The final section " +"of this tutorial presents a performance comparison between enabling and " +"disabling Suffix Decoding under the condition of satisfying an SLO TPOT <" +" 50ms across different datasets and concurrency levels. Validations " +"demonstrate that the Qwen3-32B model achieves a throughput improvement of" +" approximately 20% to 80% on various real-world datasets when Suffix " +"Decoding is enabled." +msgstr "" +"本教程使用的基准测试工具是AISBench,它支持对上述所有数据集进行性能测试。本教程最后一节展示了在不同数据集和并发级别下,满足SLO TPOT < 50ms条件时,启用与禁用后缀解码的性能对比。验证表明,启用后缀解码后,Qwen3-32B模型在各种真实数据集上实现了约20%至80%的吞吐量提升。" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:20 +msgid "**Download vllm-ascend Image**" +msgstr "**下载 vllm-ascend 镜像**" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:22 +msgid "" +"This tutorial uses the official image, version v0.13.0rc1. Use the " +"following command to download:" +msgstr "本教程使用官方镜像,版本为v0.13.0rc1。使用以下命令下载:" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:28 +msgid "**Run with Docker**" +msgstr "**使用 Docker 运行**" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:30 +msgid "Container startup command:" +msgstr "容器启动命令:" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:64 +msgid "**Install arctic-inference**" +msgstr "**安装 arctic-inference**" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:66 +msgid "" +"Before enabling Suffix Decoding speculative inference on Ascend, the " +"Arctic Inference plugin must be installed. Arctic Inference is an open-" +"source plugin launched by Snowflake specifically to optimize LLM " +"inference speed. For detailed technical principles, please refer to the " +"following article: [Fastest Speculative Decoding in vLLM with Arctic " +"Inference and Arctic Training](https://www.snowflake.com/en/engineering-" +"blog/fast-speculative-decoding-vllm-arctic/). Install it within the " +"container using the following command:" +msgstr "" +"在Ascend上启用后缀解码推测推理之前,必须安装Arctic Inference插件。Arctic Inference是Snowflake推出的一个开源插件,专门用于优化LLM推理速度。详细技术原理请参考以下文章:[Fastest Speculative Decoding in vLLM with Arctic Inference and Arctic Training](https://www.snowflake.com/en/engineering-blog/fast-speculative-decoding-vllm-arctic/)。在容器内使用以下命令安装:" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:72 +msgid "**vLLM Instance Deployment**" +msgstr "**vLLM 实例部署**" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:74 +msgid "" +"Use the following command to start the container service instance. " +"Speculative inference is enabled via the `--speculative-config` " +"parameter, where `method` is set to `suffix`. For this test, " +"`num_speculative_tokens` is uniformly set to `3`." +msgstr "" +"使用以下命令启动容器服务实例。通过`--speculative-config`参数启用推测推理,其中`method`设置为`suffix`。本次测试中,`num_speculative_tokens`统一设置为`3`。" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:99 +msgid "**AISbench Benchmark Testing**" +msgstr "**AISbench 基准测试**" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:101 +msgid "" +"Performance for all open-source datasets is tested using AISbench. For " +"specific instructions, refer to [Using AISBench for performance " +"evaluation](https://docs.vllm.ai/projects/ascend/en/latest/developer_guide/evaluation/using_ais_bench.html" +"#execute-performance-evaluation)." +msgstr "" +"所有开源数据集的性能均使用AISbench进行测试。具体操作说明请参考[使用AISBench进行性能评估](https://docs.vllm.ai/projects/ascend/en/latest/developer_guide/evaluation/using_ais_bench.html#execute-performance-evaluation)。" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:103 +msgid "**Model Configuration**:" +msgstr "**模型配置**:" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:132 +msgid "**Performance Benchmarking Commands**:" +msgstr "**性能基准测试命令**:" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:141 +msgid "**Test Results**" +msgstr "**测试结果**" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:143 +msgid "" +"Below are the detailed test results of the six open-source datasets in " +"this evaluation. Compared to the baseline performance, the improvement in" +" TPOT and throughput performance at different concurrency levels after " +"enabling Suffix Decoding varies across datasets. The extent of " +"improvement after enabling Suffix Decoding differs among the datasets. " +"Below is a summary of the results:" +msgstr "" +"以下是本次评估中六个开源数据集的详细测试结果。与基线性能相比,启用后缀解码后,不同并发级别下的TPOT和吞吐量性能提升程度因数据集而异。启用后缀解码后的提升幅度在不同数据集间存在差异。以下是结果总结:" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "**Typical Representative**" +msgstr "**典型代表**" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "**Throughput Improvement (BS=1-10)**" +msgstr "**吞吐量提升 (BS=1-10)**" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "**SLO TPOT**" +msgstr "**SLO TPOT**" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "**High Gain**" +msgstr "**高增益**" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "AGIEval, GSM8K" +msgstr "AGIEval, GSM8K" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "**> 50%**" +msgstr "**> 50%**" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "< 50ms" +msgstr "< 50ms" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "**Medium-Low Gain**" +msgstr "**中低增益**" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "ARC, ShareGPT" +msgstr "ARC, ShareGPT" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "**20% ~ 30%**" +msgstr "**20% ~ 30%**" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md:150 +msgid "Below is the raw detailed test results:" +msgstr "以下是原始详细测试结果:" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "Concurrency" +msgstr "并发数" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "Avg Input" +msgstr "平均输入长度" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "Avg Output" +msgstr "平均输出长度" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "Requests" +msgstr "请求数" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "Base TPOT(ms)" +msgstr "基线 TPOT(ms)" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "Base Throughput(TPS)" +msgstr "基线吞吐量(TPS)" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "Suffix TPOT(ms)" +msgstr "后缀解码 TPOT(ms)" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "Suffix Throughput(TPS)" +msgstr "后缀解码吞吐量(TPS)" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "Accept Rate" +msgstr "接受率" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "TPOT Gain" +msgstr "TPOT 增益" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "TPS Gain" +msgstr "TPS 增益" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "**Humaneval**" +msgstr "**Humaneval**" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "1" +msgstr "1" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "150" +msgstr "150" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "2700" +msgstr "2700" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "100" +msgstr "100" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "55.1" +msgstr "55.1" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "18.1" +msgstr "18.1" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "37.9" +msgstr "37.9" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "26.3" +msgstr "26.3" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "27.0%" +msgstr "27.0%" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "45.2%" +msgstr "45.2%" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "45.1%" +msgstr "45.1%" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "15" +msgstr "15" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "61.6" +msgstr "61.6" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "233.8" +msgstr "233.8" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "45.8" +msgstr "45.8" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "318.2" +msgstr "318.2" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "34.6%" +msgstr "34.6%" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "36.1%" +msgstr "36.1%" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "26" +msgstr "26" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "64.7" +msgstr "64.7" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "403.8" +msgstr "403.8" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "50.9" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "519.2" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "27.2%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "28.6%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "**ARC**" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "76" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "960" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "52.8" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "18.9" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "39.5" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "25.4" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "23.9%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "33.7%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "8" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "59.1" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "125.4" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "47.0" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "163.1" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "25.7%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "30.0%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "59.8" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "245.8" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "48.9" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "311.7" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "22.3%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "26.8%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "**GSM8K**" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "67" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "1570" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "55.5" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "18.0" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "35.7" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "28.5" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "31.1%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "55.6%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "58.4%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "17" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "61.5" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "279.8" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "45.4" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "403.0" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "35.6%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "44.0%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "63.9" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "396.4" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "50.0" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "527.6" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "27.8%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "33.1%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "**ShareGPT**" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "666" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "231" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "327" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "54.1" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "18.3" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "39.2" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "24.1" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "37.9%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "31.5%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "58.8" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "125.0" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "46.2" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "153.2" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "27.1%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "22.5%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "14" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "61.8" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "227.0" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "49.9" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "273.9" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "23.8%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "20.7%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "**SuperGLUE_BoolQ**" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "207" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "314" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "18.4" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "36.1" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "26.8" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "33.4%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "49.8%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "45.6%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "16" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "60.0" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "229.7" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "43.5" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "303.9" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "38.0%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "32.3%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "32" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "62.7" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "47.8" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "507.5" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "31.3%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "28.0%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "**AGIEval**" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "735" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "1880" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "53.1" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "18.7" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "31.8" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "34.1" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "50.3%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "66.8%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "81.9%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "24" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "64.0" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "381.2" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "43.3" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "629.0" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "47.8%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "65.0%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "34" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "70.0" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "494.6" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "50.2" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "768.4" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "39.4%" +msgstr "" + +#: ../../source/tutorials/features/suffix_speculative_decoding.md +msgid "55.3%" +msgstr "" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/hardwares/310p.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/hardwares/310p.po new file mode 100644 index 00000000..77b37950 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/hardwares/310p.po @@ -0,0 +1,142 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/hardwares/310p.md:1 +msgid "Atlas 300I" +msgstr "Atlas 300I" + +#: ../../source/tutorials/hardwares/310p.md:4 +msgid "" +"This Atlas 300I series is currently experimental. In future versions, " +"there may be behavioral changes related to model coverage and performance" +" improvement." +msgstr "" +"Atlas 300I 系列目前处于实验阶段。在未来的版本中,可能会发生与模型覆盖范围和性能改进相关的行为变更。" + +#: ../../source/tutorials/hardwares/310p.md:5 +msgid "" +"Currently, the Atlas 300I series only supports eager mode and the float16" +" data type." +msgstr "目前,Atlas 300I 系列仅支持 eager 模式和 float16 数据类型。" + +#: ../../source/tutorials/hardwares/310p.md:8 +msgid "Run vLLM on Atlas 300I Series" +msgstr "在 Atlas 300I 系列上运行 vLLM" + +#: ../../source/tutorials/hardwares/310p.md:10 +msgid "Run docker container:" +msgstr "运行 docker 容器:" + +#: ../../source/tutorials/hardwares/310p.md:40 +msgid "Set up environment variables:" +msgstr "设置环境变量:" + +#: ../../source/tutorials/hardwares/310p.md:50 +msgid "Online Inference on NPU" +msgstr "在 NPU 上进行在线推理" + +#: ../../source/tutorials/hardwares/310p.md:53 +msgid "" +"For Atlas 300I (310P), do not rely on `max-model-len` auto detection " +"(omit `--max-model-len`), because it may cause OOM." +msgstr "对于 Atlas 300I (310P),不要依赖 `max-model-len` 的自动检测(省略 `--max-model-len`),因为这可能导致 OOM。" + +#: ../../source/tutorials/hardwares/310p.md:56 +msgid "Reason (current 310P attention path):" +msgstr "原因(当前 310P 注意力路径):" + +#: ../../source/tutorials/hardwares/310p.md:57 +msgid "" +"`AscendAttentionMetadataBuilder310` passes `model_config.max_model_len` " +"to `AttentionMaskBuilder310`." +msgstr "`AscendAttentionMetadataBuilder310` 将 `model_config.max_model_len` 传递给 `AttentionMaskBuilder310`。" + +#: ../../source/tutorials/hardwares/310p.md:59 +msgid "" +"`AttentionMaskBuilder310` builds a full causal mask with shape " +"`[max_model_len, max_model_len]` in float16, then casts it to FRACTAL_NZ." +msgstr "`AttentionMaskBuilder310` 构建一个形状为 `[max_model_len, max_model_len]` 的完整因果掩码(float16 类型),然后将其转换为 FRACTAL_NZ 格式。" + +#: ../../source/tutorials/hardwares/310p.md:61 +msgid "" +"In 310P `attention_v1` prefill/chunked-prefill (`_npu_flash_attention` / " +"`_npu_paged_attention_splitfuse`), this explicit mask tensor is consumed " +"directly, and there is no compressed-mask path." +msgstr "在 310P 的 `attention_v1` prefill/chunked-prefill (`_npu_flash_attention` / `_npu_paged_attention_splitfuse`) 中,这个显式的掩码张量被直接使用,不存在压缩掩码路径。" + +#: ../../source/tutorials/hardwares/310p.md:66 +msgid "" +"So if auto resolves to a large context length, the mask allocation " +"(`O(max_model_len^2)`) can exceed NPU memory and trigger OOM. Always set " +"a conservative explicit value, for example `--max-model-len 4096`." +msgstr "因此,如果自动解析到一个很大的上下文长度,掩码分配(`O(max_model_len^2)`)可能会超出 NPU 内存并触发 OOM。请始终设置一个保守的显式值,例如 `--max-model-len 4096`。" + +#: ../../source/tutorials/hardwares/310p.md:71 +msgid "" +"Run the following script to start the vLLM server on NPU (Qwen3-0.6B:1 " +"card, Qwen2.5-7B-Instruct:2 cards, Pangu-Pro-MoE-72B: 8 cards):" +msgstr "运行以下脚本在 NPU 上启动 vLLM 服务器(Qwen3-0.6B:1卡,Qwen2.5-7B-Instruct:2卡,Pangu-Pro-MoE-72B:8卡):" + +#: ../../source/tutorials/hardwares/310p.md +msgid "Qwen3-0.6B" +msgstr "Qwen3-0.6B" + +#: ../../source/tutorials/hardwares/310p.md:81 +#: ../../source/tutorials/hardwares/310p.md:111 +#: ../../source/tutorials/hardwares/310p.md:141 +msgid "Run the following command to start the vLLM server:" +msgstr "运行以下命令启动 vLLM 服务器:" + +#: ../../source/tutorials/hardwares/310p.md:92 +#: ../../source/tutorials/hardwares/310p.md:122 +#: ../../source/tutorials/hardwares/310p.md:152 +msgid "Once your server is started, you can query the model with input prompts." +msgstr "服务器启动后,您可以使用输入提示词查询模型。" + +#: ../../source/tutorials/hardwares/310p.md +msgid "Qwen2.5-7B-Instruct" +msgstr "Qwen2.5-7B-Instruct" + +#: ../../source/tutorials/hardwares/310p.md +msgid "Qwen2.5-VL-3B-Instruct" +msgstr "Qwen2.5-VL-3B-Instruct" + +#: ../../source/tutorials/hardwares/310p.md:168 +msgid "If you run this script successfully, you can see the results." +msgstr "如果此脚本运行成功,您将看到结果。" + +#: ../../source/tutorials/hardwares/310p.md:170 +msgid "Offline Inference" +msgstr "离线推理" + +#: ../../source/tutorials/hardwares/310p.md:172 +msgid "" +"Run the following script (`example.py`) to execute offline inference on " +"NPU:" +msgstr "运行以下脚本 (`example.py`) 在 NPU 上执行离线推理:" + +#: ../../source/tutorials/hardwares/310p.md:307 +msgid "Run script:" +msgstr "运行脚本:" + +#: ../../source/tutorials/hardwares/310p.md:313 +msgid "If you run this script successfully, you can see the info shown below:" +msgstr "如果此脚本运行成功,您将看到如下信息:" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/hardwares/index.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/hardwares/index.po new file mode 100644 index 00000000..991ceddf --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/hardwares/index.po @@ -0,0 +1,29 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/hardwares/index.md:1 +#: ../../source/tutorials/hardwares/index.md:5 +msgid "Hardware Tutorials" +msgstr "硬件教程" + +#: ../../source/tutorials/hardwares/index.md:3 +msgid "This section provides tutorials on different hardware of vLLM Ascend." +msgstr "本节提供关于 vLLM Ascend 不同硬件的教程。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/DeepSeek-R1.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/DeepSeek-R1.po new file mode 100644 index 00000000..fe6ccf52 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/DeepSeek-R1.po @@ -0,0 +1,364 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/DeepSeek-R1.md:1 +msgid "DeepSeek-R1" +msgstr "DeepSeek-R1" + +#: ../../source/tutorials/models/DeepSeek-R1.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/DeepSeek-R1.md:5 +msgid "" +"DeepSeek-R1 is a high-performance Mixture-of-Experts (MoE) large language" +" model developed by DeepSeek Company. It excels in complex logical " +"reasoning, mathematical problem-solving, and code generation. By " +"dynamically activating its expert networks, it delivers exceptional " +"performance while maintaining computational efficiency. Building upon R1," +" DeepSeek-R1-W8A8 is a fully quantized version of the model. It employs " +"8-bit integer (INT8) quantization for both weights and activations, which" +" significantly reduces the model's memory footprint and computational " +"requirements, enabling more efficient deployment and application in " +"resource-constrained environments. This article takes the " +"`DeepSeek-R1-W8A8` version as an example to introduce the deployment of " +"the R1 series models." +msgstr "" +"DeepSeek-R1 是由深度求索公司开发的高性能混合专家(MoE)大语言模型。它在复杂逻辑推理、数学问题求解和代码生成方面表现出色。通过动态激活其专家网络,它在保持计算效率的同时提供了卓越的性能。基于 R1,DeepSeek-R1-W8A8 是该模型的完全量化版本。它对权重和激活均采用 8 位整数(INT8)量化,这显著减少了模型的内存占用和计算需求,使其能够在资源受限的环境中更高效地部署和应用。本文以 `DeepSeek-R1-W8A8` 版本为例,介绍 R1 系列模型的部署。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:8 +msgid "Supported Features" +msgstr "支持的功能" + +#: ../../source/tutorials/models/DeepSeek-R1.md:10 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考[支持的功能](../../user_guide/support_matrix/supported_models.md)以获取模型支持的功能矩阵。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:12 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "请参考[功能指南](../../user_guide/feature_guide/index.md)以获取功能的配置方法。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:14 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/DeepSeek-R1.md:16 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/DeepSeek-R1.md:18 +msgid "" +"`DeepSeek-R1-W8A8`(Quantized version): require 1 Atlas 800 A3 (64G × 16) " +"nodes or 2 Atlas 800 A2 (64G × 8) nodes. [Download model " +"weight](https://www.modelscope.cn/models/vllm-ascend/DeepSeek-R1-W8A8)" +msgstr "" +"`DeepSeek-R1-W8A8`(量化版本):需要 1 个 Atlas 800 A3(64G × 16)节点或 2 个 Atlas 800 A2(64G × 8)节点。[下载模型权重](https://www.modelscope.cn/models/vllm-ascend/DeepSeek-R1-W8A8)" + +#: ../../source/tutorials/models/DeepSeek-R1.md:20 +msgid "" +"It is recommended to download the model weight to the shared directory of" +" multiple nodes." +msgstr "建议将模型权重下载到多节点的共享目录中。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:22 +msgid "Verify Multi-node Communication(Optional)" +msgstr "验证多节点通信(可选)" + +#: ../../source/tutorials/models/DeepSeek-R1.md:24 +msgid "" +"If you want to deploy multi-node environment, you need to verify multi-" +"node communication according to [verify multi-node communication " +"environment](../../installation.md#verify-multi-node-communication)." +msgstr "如果您想部署多节点环境,需要根据[验证多节点通信环境](../../installation.md#verify-multi-node-communication)来验证多节点通信。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:26 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/DeepSeek-R1.md:28 +msgid "You can use our official docker image to run `DeepSeek-R1-W8A8` directly." +msgstr "您可以使用我们的官方 docker 镜像直接运行 `DeepSeek-R1-W8A8`。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:30 +msgid "" +"Select an image based on your machine type and start the docker image on " +"your node, refer to [using docker](../../installation.md#set-up-using-" +"docker)." +msgstr "根据您的机器类型选择一个镜像并在您的节点上启动 docker 镜像,请参考[使用 docker](../../installation.md#set-up-using-docker)。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:69 +msgid "" +"If you want to deploy multi-node environment, you need to set up " +"environment on each node." +msgstr "如果您想部署多节点环境,需要在每个节点上设置环境。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:71 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/DeepSeek-R1.md:73 +msgid "Service-oriented Deployment" +msgstr "面向服务的部署" + +#: ../../source/tutorials/models/DeepSeek-R1.md:75 +msgid "" +"`DeepSeek-R1-W8A8`: require 1 Atlas 800 A3 (64G × 16) nodes or 2 Atlas " +"800 A2 (64G × 8)." +msgstr "`DeepSeek-R1-W8A8`:需要 1 个 Atlas 800 A3(64G × 16)节点或 2 个 Atlas 800 A2(64G × 8)节点。" + +#: ../../source/tutorials/models/DeepSeek-R1.md +msgid "DeepSeek-R1-W8A8 A3 series" +msgstr "DeepSeek-R1-W8A8 A3 系列" + +#: ../../source/tutorials/models/DeepSeek-R1.md:120 +msgid "**Notice:** The parameters are explained as follows:" +msgstr "**注意:** 参数解释如下:" + +#: ../../source/tutorials/models/DeepSeek-R1.md:123 +msgid "" +"Setting the environment variable `VLLM_ASCEND_BALANCE_SCHEDULING=1` " +"enables balance scheduling. This may help increase output throughput and " +"reduce TPOT in v1 scheduler. However, TTFT may degrade in some scenarios." +" Furthermore, enabling this feature is not recommended in scenarios where" +" PD is separated." +msgstr "" +"设置环境变量 `VLLM_ASCEND_BALANCE_SCHEDULING=1` 可启用均衡调度。这可能有助于在 v1 调度器中提高输出吞吐量并降低 TPOT。然而,在某些场景下 TTFT 可能会下降。此外,在 PD 分离的场景中不建议启用此功能。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:124 +msgid "" +"For single-node deployment, we recommend using `dp4tp4` instead of " +"`dp2tp8`." +msgstr "对于单节点部署,我们建议使用 `dp4tp4` 而不是 `dp2tp8`。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:125 +msgid "" +"`--max-model-len` specifies the maximum context length - that is, the sum" +" of input and output tokens for a single request. For performance testing" +" with an input length of 3.5K and output length of 1.5K, a value of " +"`16384` is sufficient, however, for precision testing, please set it to " +"at least `35000`." +msgstr "" +"`--max-model-len` 指定最大上下文长度——即单个请求的输入和输出令牌总数。对于输入长度为 3.5K 和输出长度为 1.5K 的性能测试,`16384` 的值已足够,但对于精度测试,请将其设置为至少 `35000`。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:126 +msgid "" +"`--no-enable-prefix-caching` indicates that prefix caching is disabled. " +"To enable it, remove this option." +msgstr "`--no-enable-prefix-caching` 表示前缀缓存被禁用。要启用它,请移除此选项。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:127 +msgid "" +"If you use the w4a8 weight, more memory will be allocated to kvcache, and" +" you can try to increase system throughput to achieve greater throughput." +msgstr "如果您使用 w4a8 权重,将有更多内存分配给 kvcache,您可以尝试增加系统吞吐量以实现更大的吞吐量。" + +#: ../../source/tutorials/models/DeepSeek-R1.md +msgid "DeepSeek-R1-W8A8 A2 series" +msgstr "DeepSeek-R1-W8A8 A2 系列" + +#: ../../source/tutorials/models/DeepSeek-R1.md:132 +msgid "Run the following scripts on two nodes respectively." +msgstr "分别在两个节点上运行以下脚本。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:134 +msgid "**Node 0**" +msgstr "**节点 0**" + +#: ../../source/tutorials/models/DeepSeek-R1.md:179 +msgid "**Node 1**" +msgstr "**节点 1**" + +#: ../../source/tutorials/models/DeepSeek-R1.md:230 +msgid "Prefill-Decode Disaggregation" +msgstr "Prefill-Decode 解耦" + +#: ../../source/tutorials/models/DeepSeek-R1.md:232 +msgid "" +"We recommend using DeepSeek-V3.1 for deployment: " +"[DeepSeek-V3.1](./DeepSeek-V3.1.md)." +msgstr "我们推荐使用 DeepSeek-V3.1 进行部署:[DeepSeek-V3.1](./DeepSeek-V3.1.md)。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:234 +msgid "This solution has been tested and demonstrates excellent performance." +msgstr "此解决方案已经过测试,并展现出优异的性能。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:236 +msgid "Functional Verification" +msgstr "功能验证" + +#: ../../source/tutorials/models/DeepSeek-R1.md:238 +msgid "Once your server is started, you can query the model with input prompts:" +msgstr "一旦您的服务器启动,您就可以使用输入提示词查询模型:" + +#: ../../source/tutorials/models/DeepSeek-R1.md:251 +msgid "Accuracy Evaluation" +msgstr "精度评估" + +#: ../../source/tutorials/models/DeepSeek-R1.md:253 +msgid "Here are two accuracy evaluation methods." +msgstr "这里有两种精度评估方法。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:255 +#: ../../source/tutorials/models/DeepSeek-R1.md:286 +msgid "Using AISBench" +msgstr "使用 AISBench" + +#: ../../source/tutorials/models/DeepSeek-R1.md:257 +msgid "" +"Refer to [Using " +"AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " +"details." +msgstr "详情请参考[使用 AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:259 +msgid "" +"After execution, you can get the result, here is the result of " +"`DeepSeek-R1-W8A8` in `vllm-ascend:0.11.0rc2` for reference only." +msgstr "执行后,您可以获得结果,以下是 `DeepSeek-R1-W8A8` 在 `vllm-ascend:0.11.0rc2` 中的结果,仅供参考。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:130 +msgid "dataset" +msgstr "数据集" + +#: ../../source/tutorials/models/DeepSeek-R1.md:130 +msgid "version" +msgstr "版本" + +#: ../../source/tutorials/models/DeepSeek-R1.md:130 +msgid "metric" +msgstr "指标" + +#: ../../source/tutorials/models/DeepSeek-R1.md:130 +msgid "mode" +msgstr "模式" + +#: ../../source/tutorials/models/DeepSeek-R1.md:130 +msgid "vllm-api-general-chat" +msgstr "vllm-api-general-chat" + +#: ../../source/tutorials/models/DeepSeek-R1.md:130 +msgid "aime2024dataset" +msgstr "aime2024dataset" + +#: ../../source/tutorials/models/DeepSeek-R1.md:130 +msgid "-" +msgstr "-" + +#: ../../source/tutorials/models/DeepSeek-R1.md:130 +msgid "accuracy" +msgstr "准确率" + +#: ../../source/tutorials/models/DeepSeek-R1.md:130 +msgid "gen" +msgstr "gen" + +#: ../../source/tutorials/models/DeepSeek-R1.md:130 +msgid "80.00" +msgstr "80.00" + +#: ../../source/tutorials/models/DeepSeek-R1.md:130 +msgid "gpqadataset" +msgstr "gpqadataset" + +#: ../../source/tutorials/models/DeepSeek-R1.md:130 +msgid "72.22" +msgstr "72.22" + +#: ../../source/tutorials/models/DeepSeek-R1.md:266 +msgid "Using Language Model Evaluation Harness" +msgstr "使用 Language Model Evaluation Harness" + +#: ../../source/tutorials/models/DeepSeek-R1.md:268 +msgid "" +"As an example, take the `gsm8k` dataset as a test dataset, and run " +"accuracy evaluation of `DeepSeek-R1-W8A8` in online mode." +msgstr "以 `gsm8k` 数据集作为测试数据集为例,在在线模式下运行 `DeepSeek-R1-W8A8` 的精度评估。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:270 +msgid "" +"Refer to [Using " +"lm_eval](../../developer_guide/evaluation/using_lm_eval.md) for `lm_eval`" +" installation." +msgstr "`lm_eval` 的安装请参考[使用 lm_eval](../../developer_guide/evaluation/using_lm_eval.md)。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:272 +msgid "Run `lm_eval` to execute the accuracy evaluation." +msgstr "运行 `lm_eval` 以执行精度评估。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:282 +msgid "After execution, you can get the result." +msgstr "执行后,您可以获得结果。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:284 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/DeepSeek-R1.md:288 +msgid "" +"Refer to [Using AISBench for performance " +"evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation) for details." +msgstr "详情请参考[使用 AISBench 进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:290 +msgid "Using vLLM Benchmark" +msgstr "使用 vLLM Benchmark" + +#: ../../source/tutorials/models/DeepSeek-R1.md:292 +msgid "Run performance evaluation of `DeepSeek-R1-W8A8` as an example." +msgstr "以运行 `DeepSeek-R1-W8A8` 的性能评估为例。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:294 +msgid "" +"Refer to [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/) " +"for more details." +msgstr "更多详情请参考 [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/)。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:296 +msgid "There are three `vllm bench` subcommands:" +msgstr "`vllm bench` 有三个子命令:" + +#: ../../source/tutorials/models/DeepSeek-R1.md:298 +msgid "`latency`: Benchmark the latency of a single batch of requests." +msgstr "`latency`:对单批请求的延迟进行基准测试。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:299 +msgid "`serve`: Benchmark the online serving throughput." +msgstr "`serve`:对在线服务吞吐量进行基准测试。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:300 +msgid "`throughput`: Benchmark offline inference throughput." +msgstr "`throughput`:对离线推理吞吐量进行基准测试。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:302 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例。运行代码如下。" + +#: ../../source/tutorials/models/DeepSeek-R1.md:309 +msgid "" +"After about several minutes, you can get the performance evaluation " +"result." +msgstr "大约几分钟后,您就可以获得性能评估结果。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/DeepSeek-V3.1.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/DeepSeek-V3.1.po new file mode 100644 index 00000000..3764a453 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/DeepSeek-V3.1.po @@ -0,0 +1,608 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:1 +msgid "DeepSeek-V3/3.1" +msgstr "DeepSeek-V3/3.1" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:5 +msgid "" +"DeepSeek-V3.1 is a hybrid model that supports both thinking mode and non-" +"thinking mode. Compared to the previous version, this upgrade brings " +"improvements in multiple aspects:" +msgstr "" +"DeepSeek-V3.1 是一个支持思考模式和非思考模式的混合模型。与前一版本相比,此" +"次升级在多个方面带来了改进:" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:7 +msgid "" +"Hybrid thinking mode: One model supports both thinking mode and non-" +"thinking mode by changing the chat template." +msgstr "" +"混合思考模式:一个模型通过更改聊天模板,同时支持思考模式和非思考模式。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:9 +msgid "" +"Smarter tool calling: Through post-training optimization, the model's " +"performance in tool usage and agent tasks has significantly improved." +msgstr "" +"更智能的工具调用:通过后训练优化,模型在工具使用和智能体任务方面的性能显著提" +"升。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:11 +msgid "" +"Higher thinking efficiency: DeepSeek-V3.1-Think achieves comparable " +"answer quality to DeepSeek-R1-0528, while responding more quickly." +msgstr "" +"更高的思考效率:DeepSeek-V3.1-Think 实现了与 DeepSeek-R1-0528 相当的答案质" +"量,同时响应速度更快。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:13 +msgid "The `DeepSeek-V3.1` model is first supported in `vllm-ascend:v0.9.1rc3`." +msgstr "`DeepSeek-V3.1` 模型首次在 `vllm-ascend:v0.9.1rc3` 中得到支持。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:15 +msgid "" +"This document will show the main verification steps of the model, " +"including supported features, feature configuration, environment " +"preparation, single-node and multi-node deployment, accuracy and " +"performance evaluation." +msgstr "" +"本文档将展示该模型的主要验证步骤,包括支持的特性、特性配置、环境准备、单节点" +"和多节点部署、精度和性能评估。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:17 +msgid "Supported Features" +msgstr "支持的特性" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:19 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "" +"请参考 [支持的特性](../../user_guide/support_matrix/supported_models.md) " +"以获取模型支持的特性矩阵。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:21 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "" +"请参考 [特性指南](../../user_guide/feature_guide/index.md) 以获取特性的配" +"置。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:23 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:25 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:27 +msgid "" +"`DeepSeek-V3.1`(BF16 version): [Download model " +"weight](https://www.modelscope.cn/models/deepseek-ai/DeepSeek-V3.1)." +msgstr "" +"`DeepSeek-V3.1`(BF16 版本):[下载模型权重](https://www.modelscope.cn/" +"models/deepseek-ai/DeepSeek-V3.1)。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:28 +msgid "" +"`DeepSeek-V3.1-w8a8-mtp-QuaRot`(Quantized version with mix mtp): " +"[Download model weight](https://www.modelscope.cn/models/Eco-" +"Tech/DeepSeek-V3.1-w8a8-mtp-QuaRot)." +msgstr "" +"`DeepSeek-V3.1-w8a8-mtp-QuaRot`(混合 MTP 量化版本):[下载模型权重]" +"(https://www.modelscope.cn/models/Eco-Tech/DeepSeek-V3.1-w8a8-mtp-" +"QuaRot)。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:29 +msgid "" +"`DeepSeek-V3.1-Terminus-w4a8-mtp-QuaRot`(Quantized version with mix mtp):" +" [Download model weight](https://www.modelscope.cn/models/Eco-" +"Tech/DeepSeek-V3.1-Terminus-w4a8-mtp-QuaRot)." +msgstr "" +"`DeepSeek-V3.1-Terminus-w4a8-mtp-QuaRot`(混合 MTP 量化版本):[下载模型权" +"重](https://www.modelscope.cn/models/Eco-Tech/DeepSeek-V3.1-Terminus-w4a8-" +"mtp-QuaRot)。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:30 +#, python-format +msgid "" +"`Quantization method`: " +"[msmodelslim](https://gitcode.com/Ascend/msit/blob/master/msmodelslim/example/DeepSeek/README.md#deepseek-v31-w8a8-%E6%B7%B7%E5%90%88%E9%87%8F%E5%8C%96-mtp-%E9%87%8F%E5%8C%96)." +" You can use this method to quantize the model." +msgstr "" +"`量化方法`:" +"[msmodelslim](https://gitcode.com/Ascend/msit/blob/master/msmodelslim/example/DeepSeek/README.md#deepseek-v31-w8a8-%E6%B7%B7%E5%90%88%E9%87%8F%E5%8C%96-mtp-%E9%87%8F%E5%8C%96)。" +" 您可以使用此方法对模型进行量化。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:32 +msgid "" +"It is recommended to download the model weight to the shared directory of" +" multiple nodes, such as `/root/.cache/`." +msgstr "建议将模型权重下载到多个节点的共享目录中,例如 `/root/.cache/`。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:34 +msgid "Verify Multi-node Communication(Optional)" +msgstr "验证多节点通信(可选)" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:36 +msgid "" +"If you want to deploy multi-node environment, you need to verify multi-" +"node communication according to [verify multi-node communication " +"environment](../../installation.md#verify-multi-node-communication)." +msgstr "" +"如果您想部署多节点环境,需要根据 [验证多节点通信环境](../../installation." +"md#verify-multi-node-communication) 验证多节点通信。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:38 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:40 +msgid "You can use our official docker image to run `DeepSeek-V3.1` directly." +msgstr "您可以使用我们的官方 docker 镜像直接运行 `DeepSeek-V3.1`。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:42 +msgid "" +"Select an image based on your machine type and start the docker image on " +"your node, refer to [using docker](../../installation.md#set-up-using-" +"docker)." +msgstr "" +"根据您的机器类型选择镜像并在节点上启动 docker 镜像,请参考 [使用 docker]" +"(../../installation.md#set-up-using-docker)。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:80 +msgid "" +"If you want to deploy multi-node environment, you need to set up " +"environment on each node." +msgstr "如果您想部署多节点环境,需要在每个节点上设置环境。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:82 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:84 +msgid "Single-node Deployment" +msgstr "单节点部署" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:86 +msgid "" +"Quantized model `DeepSeek-V3.1-w8a8-mtp-QuaRot` can be deployed on 1 " +"Atlas 800 A3 (64G × 16)." +msgstr "" +"量化模型 `DeepSeek-V3.1-w8a8-mtp-QuaRot` 可以部署在 1 台 Atlas 800 A3 " +"(64G × 16)上。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:88 +msgid "Run the following script to execute online inference." +msgstr "运行以下脚本以执行在线推理。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:131 +msgid "**Notice:** The parameters are explained as follows:" +msgstr "**注意:** 参数说明如下:" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:134 +msgid "" +"Setting the environment variable `VLLM_ASCEND_BALANCE_SCHEDULING=1` " +"enables balance scheduling. This may help increase output throughput and " +"reduce TPOT in v1 scheduler. However, TTFT may degrade in some scenarios." +" Furthermore, enabling this feature is not recommended in scenarios where" +" PD is separated." +msgstr "" +"设置环境变量 `VLLM_ASCEND_BALANCE_SCHEDULING=1` 启用均衡调度。这可能有助于" +"在 v1 调度器中提高输出吞吐量并降低 TPOT。然而,在某些场景下 TTFT 可能会下" +"降。此外,在 PD 分离的场景中不建议启用此功能。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:135 +msgid "" +"For single-node deployment, we recommend using `dp4tp4` instead of " +"`dp2tp8`." +msgstr "对于单节点部署,我们建议使用 `dp4tp4` 而不是 `dp2tp8`。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:136 +msgid "" +"`--max-model-len` specifies the maximum context length - that is, the sum" +" of input and output tokens for a single request. For performance testing" +" with an input length of 3.5K and output length of 1.5K, a value of " +"`16384` is sufficient, however, for precision testing, please set it at " +"least `35000`." +msgstr "" +"`--max-model-len` 指定最大上下文长度——即单个请求的输入和输出令牌之和。对于输" +"入长度为 3.5K 和输出长度为 1.5K 的性能测试,`16384` 的值就足够了,但是,对于" +"精度测试,请至少将其设置为 `35000`。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:137 +msgid "" +"`--no-enable-prefix-caching` indicates that prefix caching is disabled. " +"To enable it, remove this option." +msgstr "" +"`--no-enable-prefix-caching` 表示前缀缓存被禁用。要启用它,请移除此选项。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:138 +msgid "" +"If you use the w4a8 weight, more memory will be allocated to kvcache, and" +" you can try to increase system throughput to achieve greater throughput." +msgstr "" +"如果使用 w4a8 权重,将分配更多内存给 kvcache,您可以尝试增加系统吞吐量以实现" +"更大的吞吐量。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:140 +msgid "Multi-node Deployment" +msgstr "多节点部署" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:142 +msgid "" +"`DeepSeek-V3.1-w8a8-mtp-QuaRot`: require at least 2 Atlas 800 A2 (64G × " +"8)." +msgstr "" +"`DeepSeek-V3.1-w8a8-mtp-QuaRot`:需要至少 2 台 Atlas 800 A2(64G × 8)。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:144 +msgid "Run the following scripts on two nodes respectively." +msgstr "分别在两个节点上运行以下脚本。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:146 +msgid "**Node 0**" +msgstr "**节点 0**" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:198 +msgid "**Node 1**" +msgstr "**节点 1**" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:252 +msgid "Prefill-Decode Disaggregation" +msgstr "Prefill-Decode 解耦" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:254 +msgid "" +"We recommend using Mooncake for deployment: " +"[Mooncake](../features/pd_disaggregation_mooncake_multi_node.md)." +msgstr "" +"我们建议使用 Mooncake 进行部署:[Mooncake](../features/" +"pd_disaggregation_mooncake_multi_node.md)。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:256 +msgid "" +"Take Atlas 800 A3 (64G × 16) for example, we recommend to deploy 2P1D (4 " +"nodes) rather than 1P1D (2 nodes), because there is no enough NPU memory " +"to serve high concurrency in 1P1D case." +msgstr "" +"以 Atlas 800 A3(64G × 16)为例,我们建议部署 2P1D(4 个节点)而不是 1P1D" +"(2 个节点),因为在 1P1D 情况下没有足够的 NPU 内存来服务高并发。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:258 +msgid "" +"`DeepSeek-V3.1-w8a8-mtp-QuaRot 2P1D Layerwise` require 4 Atlas 800 A3 " +"(64G × 16)." +msgstr "" +"`DeepSeek-V3.1-w8a8-mtp-QuaRot 2P1D Layerwise` 需要 4 台 Atlas 800 A3 " +"(64G × 16)。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:260 +msgid "" +"To run the vllm-ascend `Prefill-Decode Disaggregation` service, you need " +"to deploy a `launch_dp_program.py` script and a `run_dp_template.sh` " +"script on each node and deploy a `proxy.sh` script on prefill master node" +" to forward requests." +msgstr "" +"要运行 vllm-ascend `Prefill-Decode 解耦`服务,您需要在每个节点上部署一个 " +"`launch_dp_program.py` 脚本和一个 `run_dp_template.sh` 脚本,并在 prefill " +"主节点上部署一个 `proxy.sh` 脚本来转发请求。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:262 +msgid "" +"`launch_online_dp.py` to launch external dp vllm servers. " +"[launch\\_online\\_dp.py](https://github.com/vllm-project/vllm-" +"ascend/blob/main/examples/external_online_dp/launch_online_dp.py)" +msgstr "" +"`launch_online_dp.py` 用于启动外部 dp vllm 服务器。[launch\\_online\\_dp." +"py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/" +"external_online_dp/launch_online_dp.py)" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:265 +msgid "Prefill Node 0 `run_dp_template.sh` script" +msgstr "Prefill 节点 0 `run_dp_template.sh` 脚本" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:342 +msgid "Prefill Node 1 `run_dp_template.sh` script" +msgstr "Prefill 节点 1 `run_dp_template.sh` 脚本" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:419 +msgid "Decode Node 0 `run_dp_template.sh` script" +msgstr "Decode 节点 0 `run_dp_template.sh` 脚本" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:495 +msgid "Decode Node 1 `run_dp_template.sh` script" +msgstr "Decode 节点 1 `run_dp_template.sh` 脚本" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:571 +msgid "**Notice:** The parameters are explained as follows:" +msgstr "**注意:** 参数说明如下:" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:574 +msgid "" +"`VLLM_ASCEND_ENABLE_FLASHCOMM1=1`: enables the communication optimization" +" function on the prefill nodes." +msgstr "`VLLM_ASCEND_ENABLE_FLASHCOMM1=1`:在 prefill 节点上启用通信优化功能。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:575 +msgid "" +"`VLLM_ASCEND_ENABLE_MLAPO=1`: enables the fusion operator, which can " +"significantly improve performance but consumes more NPU memory. In the " +"Prefill-Decode (PD) separation scenario, enable MLAPO only on decode " +"nodes." +msgstr "" +"`VLLM_ASCEND_ENABLE_MLAPO=1`:启用融合算子,这可以显著提高性能但会消耗更多 " +"NPU 内存。在 Prefill-Decode (PD) 分离场景中,仅在 decode 节点上启用 MLAPO。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:576 +msgid "" +"`--async-scheduling`: enables the asynchronous scheduling function. When " +"Multi-Token Prediction (MTP) is enabled, asynchronous scheduling of " +"operator delivery can be implemented to overlap the operator delivery " +"latency." +msgstr "" +"`--async-scheduling`:启用异步调度功能。当启用多令牌预测 (MTP) 时,可以实现算" +"子交付的异步调度,以重叠算子交付延迟。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:577 +msgid "" +"`cudagraph_capture_sizes`: The recommended value is `n x (mtp + 1)`. And " +"the min is `n = 1` and the max is `n = max-num-seqs`. For other values, " +"it is recommended to set them to the number of frequently occurring " +"requests on the Decode (D) node." +msgstr "" +"`cudagraph_capture_sizes`:推荐值为 `n x (mtp + 1)`。最小值为 `n = 1`,最大" +"值为 `n = max-num-seqs`。对于其他值,建议将其设置为 Decode (D) 节点上频繁出" +"现的请求数量。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:578 +msgid "" +"`recompute_scheduler_enable: true`: enables the recomputation scheduler. " +"When the Key-Value Cache (KV Cache) of the decode node is insufficient, " +"requests will be sent to the prefill node to recompute the KV Cache. In " +"the PD separation scenario, it is recommended to enable this " +"configuration on both prefill and decode nodes simultaneously." +msgstr "" +"`recompute_scheduler_enable: true`:启用重计算调度器。当 decode 节点的键值缓" +"存 (KV Cache) 不足时,请求将被发送到 prefill 节点以重新计算 KV Cache。在 PD " +"分离场景中,建议同时在 prefill 和 decode 节点上启用此配置。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:579 +msgid "" +"`multistream_overlap_shared_expert: true`: When the Tensor Parallelism " +"(TP) size is 1 or `enable_shared_expert_dp: true`, an additional stream " +"is enabled to overlap the computation process of shared experts for " +"improved efficiency." +msgstr "" +"`multistream_overlap_shared_expert: true`:当张量并行 (TP) 大小为 1 或 " +"`enable_shared_expert_dp: true` 时,启用额外的流来重叠共享专家的计算过程,以" +"提高效率。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:580 +msgid "" +"`lmhead_tensor_parallel_size: 16`: When the Tensor Parallelism (TP) size " +"of the decode node is 1, this parameter allows the TP size of the LMHead " +"embedding layer to be greater than 1, which is used to reduce the " +"computational load of each card on the LMHead embedding layer." +msgstr "" +"`lmhead_tensor_parallel_size: 16`:当 decode 节点的张量并行 (TP) 大小为 1 " +"时,此参数允许 LMHead 嵌入层的 TP 大小大于 1,用于减少每张卡在 LMHead 嵌入层" +"上的计算负载。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:582 +msgid "run server for each node:" +msgstr "为每个节点运行服务器:" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:595 +msgid "Run the `proxy.sh` script on the prefill master node" +msgstr "在 prefill 主节点上运行 `proxy.sh` 脚本" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:597 +msgid "" +"Run a proxy server on the same node with the prefiller service instance. " +"You can get the proxy program in the repository's examples: " +"[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-" +"project/vllm-" +"ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" +msgstr "在与预填充服务实例相同的节点上运行一个代理服务器。您可以在仓库的示例中找到代理程序:[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:653 +msgid "Functional Verification" +msgstr "功能验证" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:655 +msgid "Once your server is started, you can query the model with input prompts:" +msgstr "服务器启动后,您可以使用输入提示词查询模型:" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:668 +msgid "Accuracy Evaluation" +msgstr "精度评估" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:670 +msgid "Here are two accuracy evaluation methods." +msgstr "以下是两种精度评估方法。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:672 +#: ../../source/tutorials/models/DeepSeek-V3.1.md:689 +msgid "Using AISBench" +msgstr "使用 AISBench" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:674 +msgid "" +"Refer to [Using " +"AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " +"details." +msgstr "详情请参考[使用 AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:676 +msgid "" +"After execution, you can get the result, here is the result of " +"`DeepSeek-V3.1-w8a8-mtp-QuaRot` in `vllm-ascend:0.11.0rc1` for reference " +"only." +msgstr "执行后,您可以获得结果。以下是 `vllm-ascend:0.11.0rc1` 中 `DeepSeek-V3.1-w8a8-mtp-QuaRot` 的结果,仅供参考。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:44 +msgid "dataset" +msgstr "数据集" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:44 +msgid "version" +msgstr "版本" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:44 +msgid "metric" +msgstr "指标" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:44 +msgid "mode" +msgstr "模式" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:44 +msgid "vllm-api-general-chat" +msgstr "vllm-api-general-chat" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:44 +msgid "note" +msgstr "备注" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:44 +msgid "ceval" +msgstr "ceval" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:44 +msgid "-" +msgstr "-" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:44 +msgid "accuracy" +msgstr "准确率" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:44 +msgid "gen" +msgstr "生成" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:44 +msgid "90.94" +msgstr "90.94" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:44 +msgid "1 Atlas 800 A3 (64G × 16)" +msgstr "1 Atlas 800 A3 (64G × 16)" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:44 +msgid "gsm8k" +msgstr "gsm8k" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:44 +msgid "96.28" +msgstr "96.28" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:683 +msgid "Using Language Model Evaluation Harness" +msgstr "使用 Language Model Evaluation Harness" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:685 +msgid "Not test yet." +msgstr "尚未测试。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:687 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:691 +msgid "" +"Refer to [Using AISBench for performance " +"evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation) for details." +msgstr "详情请参考[使用 AISBench 进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:693 +msgid "The performance result is:" +msgstr "性能结果如下:" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:695 +msgid "**Hardware**: A3-752T, 4 node" +msgstr "**硬件**:A3-752T,4 节点" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:697 +msgid "**Deployment**: 2P1D, Prefill node: DP2+TP8, Decode Node: DP32+TP1" +msgstr "**部署方式**:2P1D,预填充节点:DP2+TP8,解码节点:DP32+TP1" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:699 +msgid "**Input/Output**: 3.5k/1.5k" +msgstr "**输入/输出**:3.5k/1.5k" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:701 +msgid "" +"**Performance**: TTFT = 6.16s, TPOT = 48.82ms, Average performance of " +"each card is 478 TPS (Token Per Second)." +msgstr "**性能**:TTFT = 6.16s,TPOT = 48.82ms,单卡平均性能为 478 TPS(每秒令牌数)。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:703 +msgid "Using vLLM Benchmark" +msgstr "使用 vLLM Benchmark" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:705 +msgid "" +"Run performance evaluation of `DeepSeek-V3.1-w8a8-mtp-QuaRot` as an " +"example." +msgstr "以运行 `DeepSeek-V3.1-w8a8-mtp-QuaRot` 的性能评估为例。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:707 +msgid "" +"Refer to [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/) " +"for more details." +msgstr "更多详情请参考 [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/)。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:709 +msgid "There are three `vllm bench` subcommands:" +msgstr "`vllm bench` 有三个子命令:" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:711 +msgid "`latency`: Benchmark the latency of a single batch of requests." +msgstr "`latency`:对单批请求的延迟进行基准测试。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:712 +msgid "`serve`: Benchmark the online serving throughput." +msgstr "`serve`:对在线服务吞吐量进行基准测试。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:713 +msgid "`throughput`: Benchmark offline inference throughput." +msgstr "`throughput`:对离线推理吞吐量进行基准测试。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:715 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例。按如下方式运行代码。" + +#: ../../source/tutorials/models/DeepSeek-V3.1.md:721 +msgid "" +"After about several minutes, you can get the performance evaluation " +"result." +msgstr "大约几分钟后,您将获得性能评估结果。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/DeepSeek-V3.2.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/DeepSeek-V3.2.po new file mode 100644 index 00000000..c47a813b --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/DeepSeek-V3.2.po @@ -0,0 +1,396 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:1 +msgid "DeepSeek-V3.2" +msgstr "DeepSeek-V3.2" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:5 +msgid "" +"DeepSeek-V3.2 is a sparse attention model. The main architecture is " +"similar to DeepSeek-V3.1, but with a sparse attention mechanism, which is" +" designed to explore and validate optimizations for training and " +"inference efficiency in long-context scenarios." +msgstr "" +"DeepSeek-V3.2 是一个稀疏注意力模型。其主要架构与 DeepSeek-V3.1 类似,但引入了稀疏注意力机制,旨在探索和验证长上下文场景下训练和推理效率的优化方案。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:7 +msgid "" +"This document will show the main verification steps of the model, " +"including supported features, feature configuration, environment " +"preparation, single-node and multi-node deployment, accuracy and " +"performance evaluation." +msgstr "本文档将展示该模型的主要验证步骤,包括支持的特性、特性配置、环境准备、单节点与多节点部署、精度和性能评估。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:9 +msgid "Supported Features" +msgstr "支持的特性" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:11 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考[支持的特性](../../user_guide/support_matrix/supported_models.md)以获取模型支持的特性矩阵。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:13 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "请参考[特性指南](../../user_guide/feature_guide/index.md)以获取特性的配置方法。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:15 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:17 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:19 +msgid "" +"`DeepSeek-V3.2-Exp-W8A8`(Quantized version): require 1 Atlas 800 A3 (64G " +"× 16) node or 2 Atlas 800 A2 (64G × 8) nodes. [Download model " +"weight](https://www.modelscope.cn/models/vllm-ascend/DeepSeek-V3.2-Exp-" +"W8A8)" +msgstr "" +"`DeepSeek-V3.2-Exp-W8A8`(量化版本):需要 1 个 Atlas 800 A3(64G × 16)节点或 2 个 Atlas 800 A2(64G × 8)节点。[下载模型权重](https://www.modelscope.cn/models/vllm-ascend/DeepSeek-V3.2-Exp-W8A8)" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:20 +msgid "" +"`DeepSeek-V3.2-w8a8`(Quantized version): require 1 Atlas 800 A3 (64G × " +"16) node or 2 Atlas 800 A2 (64G × 8) nodes. [Download model " +"weight](https://www.modelscope.cn/models/vllm-ascend/DeepSeek-V3.2-W8A8/)" +msgstr "" +"`DeepSeek-V3.2-w8a8`(量化版本):需要 1 个 Atlas 800 A3(64G × 16)节点或 2 个 Atlas 800 A2(64G × 8)节点。[下载模型权重](https://www.modelscope.cn/models/vllm-ascend/DeepSeek-V3.2-W8A8/)" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:22 +msgid "" +"It is recommended to download the model weight to the shared directory of" +" multiple nodes, such as `/root/.cache/`." +msgstr "建议将模型权重下载到多节点的共享目录中,例如 `/root/.cache/`。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:24 +msgid "Verify Multi-node Communication(Optional)" +msgstr "验证多节点通信(可选)" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:26 +msgid "" +"If you want to deploy multi-node environment, you need to verify multi-" +"node communication according to [verify multi-node communication " +"environment](../../installation.md#verify-multi-node-communication)." +msgstr "如果您想部署多节点环境,需要根据[验证多节点通信环境](../../installation.md#verify-multi-node-communication)来验证多节点通信。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:28 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:30 +msgid "You can use our official docker image to run `DeepSeek-V3.2` directly." +msgstr "您可以使用我们的官方 docker 镜像直接运行 `DeepSeek-V3.2`。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md +msgid "A3 series" +msgstr "A3 系列" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:39 +#: ../../source/tutorials/models/DeepSeek-V3.2.md:82 +msgid "Start the docker image on your each node." +msgstr "在您的每个节点上启动 docker 镜像。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md +msgid "A2 series" +msgstr "A2 系列" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:115 +msgid "" +"In addition, if you don't want to use the docker image as above, you can " +"also build all from source:" +msgstr "此外,如果您不想使用上述 docker 镜像,也可以从源码构建所有内容:" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:117 +msgid "" +"Install `vllm-ascend` from source, refer to " +"[installation](../../installation.md)." +msgstr "从源码安装 `vllm-ascend`,请参考[安装指南](../../installation.md)。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:119 +msgid "" +"If you want to deploy multi-node environment, you need to set up " +"environment on each node." +msgstr "如果您想部署多节点环境,需要在每个节点上设置环境。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:121 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:124 +msgid "" +"In this tutorial, we suppose you downloaded the model weight to " +"`/root/.cache/`. Feel free to change it to your own path." +msgstr "在本教程中,我们假设您已将模型权重下载到 `/root/.cache/`。您可以随意更改为自己的路径。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:127 +msgid "Single-node Deployment" +msgstr "单节点部署" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:129 +msgid "" +"Quantized model `DeepSeek-V3.2-w8a8` can be deployed on 1 Atlas 800 A3 " +"(64G × 16)." +msgstr "量化模型 `DeepSeek-V3.2-w8a8` 可以部署在 1 个 Atlas 800 A3(64G × 16)节点上。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:131 +msgid "Run the following script to execute online inference." +msgstr "运行以下脚本以执行在线推理。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:164 +msgid "" +"In PD-disaggregated deployments, `layer_sharding` is supported only on " +"prefill/P nodes with `kv_role=\"kv_producer\"`. Do not enable it on " +"decode/D nodes or `kv_role=\"kv_both\"` nodes." +msgstr "在 PD 解耦部署中,`layer_sharding` 仅支持在具有 `kv_role=\"kv_producer\"` 的 prefill/P 节点上启用。不要在 decode/D 节点或 `kv_role=\"kv_both\"` 节点上启用它。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:166 +msgid "Multi-node Deployment" +msgstr "多节点部署" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:168 +msgid "`DeepSeek-V3.2-w8a8`: require at least 2 Atlas 800 A2 (64G × 8)." +msgstr "`DeepSeek-V3.2-w8a8`:需要至少 2 个 Atlas 800 A2(64G × 8)节点。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:170 +msgid "Run the following scripts on two nodes respectively." +msgstr "分别在两个节点上运行以下脚本。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:179 +#: ../../source/tutorials/models/DeepSeek-V3.2.md:283 +msgid "**Node0**" +msgstr "**节点0**" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:228 +#: ../../source/tutorials/models/DeepSeek-V3.2.md:337 +msgid "**Node1**" +msgstr "**节点1**" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:395 +msgid "Prefill-Decode Disaggregation" +msgstr "Prefill-Decode 解耦" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:397 +msgid "" +"We'd like to show the deployment guide of `DeepSeek-V3.2` on multi-node " +"environment with 1P1D for better performance." +msgstr "我们将展示 `DeepSeek-V3.2` 在多节点环境下采用 1P1D 部署的指南,以获得更好的性能。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:399 +msgid "Before you start, please" +msgstr "在开始之前,请" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:401 +msgid "prepare the script `launch_online_dp.py` on each node:" +msgstr "在每个节点上准备脚本 `launch_online_dp.py`:" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:504 +msgid "prepare the script `run_dp_template.sh` on each node." +msgstr "在每个节点上准备脚本 `run_dp_template.sh`。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:506 +#: ../../source/tutorials/models/DeepSeek-V3.2.md:809 +msgid "Prefill node 0" +msgstr "Prefill 节点 0" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:580 +#: ../../source/tutorials/models/DeepSeek-V3.2.md:816 +msgid "Prefill node 1" +msgstr "Prefill 节点 1" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:653 +#: ../../source/tutorials/models/DeepSeek-V3.2.md:823 +msgid "Decode node 0" +msgstr "Decode 节点 0" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:730 +#: ../../source/tutorials/models/DeepSeek-V3.2.md:830 +msgid "Decode node 1" +msgstr "Decode 节点 1" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:806 +msgid "" +"Once the preparation is done, you can start the server with the following" +" command on each node: Refer to [Distributed DP Server With Large-Scale " +"Expert " +"Parallelism](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/large_scale_ep.html)" +" to get the detailed boot method." +msgstr "准备工作完成后,您可以在每个节点上使用以下命令启动服务器:请参考[分布式 DP 服务器与大规模专家并行](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/large_scale_ep.html)以获取详细的启动方法。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:837 +msgid "Request Forwarding" +msgstr "请求转发" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:839 +msgid "" +"To set up request forwarding, run the following script on any machine. " +"You can get the proxy program in the repository's examples: " +"[load_balance_proxy_layerwise_server_example.py](https://github.com/vllm-" +"project/vllm-" +"ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py)" +msgstr "要设置请求转发,请在任何机器上运行以下脚本。您可以在仓库的示例中找到代理程序:[load_balance_proxy_layerwise_server_example.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py)" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:868 +msgid "Functional Verification" +msgstr "功能验证" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:870 +msgid "Once your server is started, you can query the model with input prompts:" +msgstr "服务器启动后,您可以使用输入提示词查询模型:" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:883 +msgid "Accuracy Evaluation" +msgstr "精度评估" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:885 +msgid "Here are two accuracy evaluation methods." +msgstr "这里有两种精度评估方法。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:887 +#: ../../source/tutorials/models/DeepSeek-V3.2.md:913 +msgid "Using AISBench" +msgstr "使用 AISBench" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:889 +msgid "" +"Refer to [Using " +"AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " +"details." +msgstr "详情请参考[使用 AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:891 +#: ../../source/tutorials/models/DeepSeek-V3.2.md:909 +msgid "After execution, you can get the result." +msgstr "执行后,您可以获得结果。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:893 +msgid "Using Language Model Evaluation Harness" +msgstr "使用 Language Model Evaluation Harness" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:895 +msgid "" +"As an example, take the `gsm8k` dataset as a test dataset, and run " +"accuracy evaluation of `DeepSeek-V3.2-W8A8` in online mode." +msgstr "以 `gsm8k` 数据集作为测试数据集为例,运行 `DeepSeek-V3.2-W8A8` 的在线模式精度评估。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:897 +msgid "" +"Refer to [Using " +"lm_eval](../../developer_guide/evaluation/using_lm_eval.md) for `lm_eval`" +" installation." +msgstr "`lm_eval` 的安装请参考[使用 lm_eval](../../developer_guide/evaluation/using_lm_eval.md)。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:899 +msgid "Run `lm_eval` to execute the accuracy evaluation." +msgstr "运行 `lm_eval` 以执行精度评估。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:911 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:915 +msgid "" +"Refer to [Using AISBench for performance " +"evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation) for details." +msgstr "详情请参考[使用 AISBench 进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:917 +msgid "The performance result is:" +msgstr "性能结果如下:" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:919 +msgid "**Hardware**: A3-752T, 4 node" +msgstr "**硬件**:A3-752T,4 节点" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:921 +msgid "**Deployment**: 1P1D, Prefill node: DP2+TP16, Decode Node: DP8+TP4" +msgstr "**部署**:1P1D,Prefill 节点:DP2+TP16,Decode 节点:DP8+TP4" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:923 +msgid "**Input/Output**: 64k/3k" +msgstr "**输入/输出**:64k/3k" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:925 +msgid "**Performance**: 533tps, TPOT 32ms" +msgstr "**性能**:533tps,TPOT 32ms" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:927 +msgid "Using vLLM Benchmark" +msgstr "使用 vLLM Benchmark" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:929 +msgid "Run performance evaluation of `DeepSeek-V3.2-W8A8` as an example." +msgstr "以运行 `DeepSeek-V3.2-W8A8` 的性能评估为例。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:931 +msgid "" +"Refer to [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/) " +"for more details." +msgstr "更多详情请参考 [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/)。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:933 +msgid "There are three `vllm bench` subcommands:" +msgstr "`vllm bench` 有三个子命令:" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:935 +msgid "`latency`: Benchmark the latency of a single batch of requests." +msgstr "`latency`:对单批请求的延迟进行基准测试。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:936 +msgid "`serve`: Benchmark the online serving throughput." +msgstr "`serve`:对在线服务吞吐量进行基准测试。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:937 +msgid "`throughput`: Benchmark offline inference throughput." +msgstr "`throughput`:基准离线推理吞吐量。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:939 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例,按如下方式运行代码。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:946 +msgid "Function Call" +msgstr "函数调用" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:948 +msgid "" +"The function call feature is supported from v0.13.0rc1 on. Please use the" +" latest version." +msgstr "函数调用功能自 v0.13.0rc1 版本起支持。请使用最新版本。" + +#: ../../source/tutorials/models/DeepSeek-V3.2.md:950 +msgid "" +"Refer to [DeepSeek-V3.2 Usage " +"Guide](https://docs.vllm.ai/projects/recipes/en/latest/DeepSeek/DeepSeek-V3_2.html" +"#tool-calling-example) for details." +msgstr "详情请参阅 [DeepSeek-V3.2 使用指南](https://docs.vllm.ai/projects/recipes/en/latest/DeepSeek/DeepSeek-V3_2.html#tool-calling-example)。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/GLM4.x.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/GLM4.x.po new file mode 100644 index 00000000..ad3d705c --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/GLM4.x.po @@ -0,0 +1,528 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/GLM4.x.md:1 +msgid "GLM-4.5/4.6/4.7" +msgstr "GLM-4.5/4.6/4.7" + +#: ../../source/tutorials/models/GLM4.x.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/GLM4.x.md:5 +msgid "" +"GLM-4.x series models use a Mixture-of-Experts (MoE) architecture and are" +" foundational models specifically designed for agent applications." +msgstr "GLM-4.x 系列模型采用混合专家(MoE)架构,是专为智能体应用设计的基础模型。" + +#: ../../source/tutorials/models/GLM4.x.md:7 +msgid "The `GLM-4.5` model is first supported in `vllm-ascend:v0.10.0rc1`." +msgstr "`GLM-4.5` 模型首次在 `vllm-ascend:v0.10.0rc1` 版本中得到支持。" + +#: ../../source/tutorials/models/GLM4.x.md:9 +msgid "" +"This document will show the main verification steps of the model, " +"including supported features, feature configuration, environment " +"preparation, single-node and multi-node deployment, accuracy and " +"performance evaluation." +msgstr "本文档将展示该模型的主要验证步骤,包括支持的功能、功能配置、环境准备、单节点与多节点部署、精度和性能评估。" + +#: ../../source/tutorials/models/GLM4.x.md:11 +msgid "Supported Features" +msgstr "支持的功能" + +#: ../../source/tutorials/models/GLM4.x.md:13 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考[支持的功能](../../user_guide/support_matrix/supported_models.md)以获取模型支持的功能矩阵。" + +#: ../../source/tutorials/models/GLM4.x.md:15 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "请参考[功能指南](../../user_guide/feature_guide/index.md)以获取功能的配置信息。" + +#: ../../source/tutorials/models/GLM4.x.md:17 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/GLM4.x.md:19 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/GLM4.x.md:21 +msgid "" +"`GLM-4.5`(BF16 version): [Download model " +"weight](https://www.modelscope.cn/models/ZhipuAI/GLM-4.5)." +msgstr "`GLM-4.5`(BF16 版本):[下载模型权重](https://www.modelscope.cn/models/ZhipuAI/GLM-4.5)。" + +#: ../../source/tutorials/models/GLM4.x.md:22 +msgid "" +"`GLM-4.6`(BF16 version): [Download model " +"weight](https://www.modelscope.cn/models/ZhipuAI/GLM-4.6)." +msgstr "`GLM-4.6`(BF16 版本):[下载模型权重](https://www.modelscope.cn/models/ZhipuAI/GLM-4.6)。" + +#: ../../source/tutorials/models/GLM4.x.md:23 +msgid "" +"`GLM-4.7`(BF16 version): [Download model " +"weight](https://www.modelscope.cn/models/ZhipuAI/GLM-4.7)." +msgstr "`GLM-4.7`(BF16 版本):[下载模型权重](https://www.modelscope.cn/models/ZhipuAI/GLM-4.7)。" + +#: ../../source/tutorials/models/GLM4.x.md:24 +msgid "" +"`GLM-4.5-w8a8-with-float-mtp`(Quantized version with mtp): [Download " +"model weight](https://modelers.cn/models/Modelers_Park/GLM-4.5-w8a8)." +msgstr "`GLM-4.5-w8a8-with-float-mtp`(带 mtp 的量化版本):[下载模型权重](https://modelers.cn/models/Modelers_Park/GLM-4.5-w8a8)。" + +#: ../../source/tutorials/models/GLM4.x.md:25 +msgid "" +"`GLM-4.6-w8a8`(Quantized version without mtp): [Download model " +"weight](https://modelers.cn/models/Modelers_Park/GLM-4.6-w8a8). Because " +"vllm do not support GLM4.6 mtp in October, so we do not provide mtp " +"version. And last month, it supported, you can use the following " +"quantization scheme to add mtp weights to Quantized weights." +msgstr "`GLM-4.6-w8a8`(不带 mtp 的量化版本):[下载模型权重](https://modelers.cn/models/Modelers_Park/GLM-4.6-w8a8)。由于 vllm 在十月份不支持 GLM4.6 的 mtp,因此我们不提供 mtp 版本。上个月已支持,您可以使用以下量化方案将 mtp 权重添加到量化权重中。" + +#: ../../source/tutorials/models/GLM4.x.md:26 +msgid "" +"`GLM-4.7-w8a8-with-float-mtp`(Quantized version without mtp): [Download " +"model weight](https://modelscope.cn/models/Eco-" +"Tech/GLM-4.7-W8A8-floatmtp)." +msgstr "`GLM-4.7-w8a8-with-float-mtp`(不带 mtp 的量化版本):[下载模型权重](https://modelscope.cn/models/Eco-Tech/GLM-4.7-W8A8-floatmtp)。" + +#: ../../source/tutorials/models/GLM4.x.md:27 +msgid "" +"`Method of Quantify`: [quantization " +"scheme](https://blog.csdn.net/qq_37368095/article/details/156429653?spm=1011.2124.3001.6209)." +" You can use these methods to quantify the model." +msgstr "`量化方法`:[量化方案](https://blog.csdn.net/qq_37368095/article/details/156429653?spm=1011.2124.3001.6209)。您可以使用这些方法对模型进行量化。" + +#: ../../source/tutorials/models/GLM4.x.md:29 +msgid "" +"It is recommended to download the model weight to the shared directory of" +" multiple nodes, such as `/root/.cache/`." +msgstr "建议将模型权重下载到多个节点的共享目录中,例如 `/root/.cache/`。" + +#: ../../source/tutorials/models/GLM4.x.md:31 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/GLM4.x.md:33 +msgid "You can use our official docker image to run `GLM-4.x` directly." +msgstr "您可以使用我们的官方 docker 镜像直接运行 `GLM-4.x`。" + +#: ../../source/tutorials/models/GLM4.x.md +msgid "A3 series" +msgstr "A3 系列" + +#: ../../source/tutorials/models/GLM4.x.md:42 +#: ../../source/tutorials/models/GLM4.x.md:85 +msgid "Start the docker image on your each node." +msgstr "在您的每个节点上启动 docker 镜像。" + +#: ../../source/tutorials/models/GLM4.x.md +msgid "A2 series" +msgstr "A2 系列" + +#: ../../source/tutorials/models/GLM4.x.md:118 +msgid "" +"In addition, if you don't want to use the docker image as above, you can " +"also build all from source:" +msgstr "此外,如果您不想使用上述 docker 镜像,也可以从源码构建所有内容:" + +#: ../../source/tutorials/models/GLM4.x.md:120 +msgid "" +"Install `vllm-ascend` from source, refer to " +"[installation](../../installation.md)." +msgstr "从源码安装 `vllm-ascend`,请参考[安装指南](../../installation.md)。" + +#: ../../source/tutorials/models/GLM4.x.md:122 +msgid "" +"If you want to deploy multi-node environment, you need to set up " +"environment on each node." +msgstr "如果您想部署多节点环境,需要在每个节点上设置环境。" + +#: ../../source/tutorials/models/GLM4.x.md:124 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/GLM4.x.md:126 +msgid "**Notice:**" +msgstr "**注意:**" + +#: ../../source/tutorials/models/GLM4.x.md:128 +msgid "" +"We have optimized the FIA operator in CANN 8.5.1. Manual replacement of " +"the files related to the FIA operator is required. Please execute the FIA" +" operator replacement script: " +"[A2](../../../../tools/install_flash_infer_attention_score_ops_a2.sh) and" +" [A3](../../../../tools/install_flash_infer_attention_score_ops_a3.sh) " +"The optimization of the FIA operator will be enabled by default in CANN " +"9.x releases, and manual replacement will no longer be required. Please " +"stay tuned for updates to this document." +msgstr "我们已在 CANN 8.5.1 中优化了 FIA 算子。需要手动替换与 FIA 算子相关的文件。请执行 FIA 算子替换脚本:[A2](../../../../tools/install_flash_infer_attention_score_ops_a2.sh) 和 [A3](../../../../tools/install_flash_infer_attention_score_ops_a3.sh)。FIA 算子的优化将在 CANN 9.x 版本中默认启用,届时将不再需要手动替换。请关注本文档的更新。" + +#: ../../source/tutorials/models/GLM4.x.md:132 +msgid "Single-node Deployment" +msgstr "单节点部署" + +#: ../../source/tutorials/models/GLM4.x.md:134 +msgid "In low-latency scenarios, we recommend a single-machine deployment." +msgstr "在低延迟场景下,我们推荐单机部署。" + +#: ../../source/tutorials/models/GLM4.x.md:135 +msgid "" +"Quantized model `glm4.7_w8a8_with_float_mtp` can be deployed on 1 Atlas " +"800 A3 (64G × 16) or 1 Atlas 800 A2 (64G × 8)." +msgstr "量化模型 `glm4.7_w8a8_with_float_mtp` 可以部署在 1 台 Atlas 800 A3(64G × 16)或 1 台 Atlas 800 A2(64G × 8)上。" + +#: ../../source/tutorials/models/GLM4.x.md:137 +msgid "Run the following script to execute online inference." +msgstr "运行以下脚本以执行在线推理。" + +#: ../../source/tutorials/models/GLM4.x.md:169 +msgid "**Notice:** The parameters are explained as follows:" +msgstr "**注意:** 参数解释如下:" + +#: ../../source/tutorials/models/GLM4.x.md:172 +msgid "" +"`--async-scheduling` Asynchronous scheduling is a technique used to " +"optimize inference efficiency. It allows non-blocking task scheduling to " +"improve concurrency and throughput, especially when processing large-" +"scale models." +msgstr "`--async-scheduling` 异步调度是一种用于优化推理效率的技术。它允许非阻塞的任务调度,以提高并发性和吞吐量,特别是在处理大规模模型时。" + +#: ../../source/tutorials/models/GLM4.x.md:173 +msgid "" +"`fusion_ops_gmmswigluquant` The performance of the GmmSwigluQuant fusion " +"operator tends to degrade when the total number of NPUs is ≤ 16." +msgstr "`fusion_ops_gmmswigluquant` 当 NPU 总数 ≤ 16 时,GmmSwigluQuant 融合算子的性能往往会下降。" + +#: ../../source/tutorials/models/GLM4.x.md:175 +msgid "Multi-node Deployment" +msgstr "多节点部署" + +#: ../../source/tutorials/models/GLM4.x.md:177 +msgid "" +"Although the former tutorial said \"Not recommended to deploy multi-node " +"on Atlas 800 A2 (64G × 8)\", but if you insist to deploy GLM-4.x model on" +" multi-node like 2 × Atlas 800 A2 (64G × 8), run the following scripts on" +" two nodes respectively." +msgstr "尽管之前的教程提到“不建议在 Atlas 800 A2(64G × 8)上部署多节点”,但如果您坚持要在类似 2 × Atlas 800 A2(64G × 8)的多节点上部署 GLM-4.x 模型,请分别在两个节点上运行以下脚本。" + +#: ../../source/tutorials/models/GLM4.x.md:179 +msgid "**Node 0**" +msgstr "**节点 0**" + +#: ../../source/tutorials/models/GLM4.x.md:230 +msgid "**Node 1**" +msgstr "**节点 1**" + +#: ../../source/tutorials/models/GLM4.x.md:283 +msgid "Prefill-Decode Disaggregation" +msgstr "Prefill-Decode 解耦部署" + +#: ../../source/tutorials/models/GLM4.x.md:285 +msgid "" +"We'd like to show the deployment guide of `GLM4.7` on multi-node " +"environment with 2P1D for better performance." +msgstr "我们将展示 `GLM4.7` 在多节点环境(2P1D)下的部署指南,以获得更好的性能。" + +#: ../../source/tutorials/models/GLM4.x.md:287 +msgid "Before you start, please" +msgstr "在开始之前,请" + +#: ../../source/tutorials/models/GLM4.x.md:289 +msgid "prepare the script `launch_online_dp.py` on each node:" +msgstr "在每个节点上准备脚本 `launch_online_dp.py`:" + +#: ../../source/tutorials/models/GLM4.x.md:392 +msgid "prepare the script `run_dp_template.sh` on each node." +msgstr "在每个节点上准备脚本 `run_dp_template.sh`。" + +#: ../../source/tutorials/models/GLM4.x.md:394 +#: ../../source/tutorials/models/GLM4.x.md:669 +msgid "Prefill node 0" +msgstr "Prefill 节点 0" + +#: ../../source/tutorials/models/GLM4.x.md:460 +#: ../../source/tutorials/models/GLM4.x.md:676 +msgid "Prefill node 1" +msgstr "Prefill 节点 1" + +#: ../../source/tutorials/models/GLM4.x.md:525 +#: ../../source/tutorials/models/GLM4.x.md:683 +msgid "Decode node 0" +msgstr "Decode 节点 0" + +#: ../../source/tutorials/models/GLM4.x.md:596 +#: ../../source/tutorials/models/GLM4.x.md:690 +msgid "Decode node 1" +msgstr "Decode 节点 1" + +#: ../../source/tutorials/models/GLM4.x.md:667 +msgid "" +"Once the preparation is done, you can start the server with the following" +" command on each node:" +msgstr "准备工作完成后,您可以在每个节点上使用以下命令启动服务器:" + +#: ../../source/tutorials/models/GLM4.x.md:697 +msgid "Request Forwarding" +msgstr "请求转发" + +#: ../../source/tutorials/models/GLM4.x.md:699 +msgid "" +"To set up request forwarding, run the following script on any machine. " +"You can get the proxy program in the repository's examples: " +"[load_balance_proxy_server_example.py](https://github.com/vllm-project" +"/vllm-" +"ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" +msgstr "要设置请求转发,请在任何机器上运行以下脚本。您可以在仓库的示例中找到代理程序:[load_balance_proxy_server_example.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" + +#: ../../source/tutorials/models/GLM4.x.md:728 +msgid "Functional Verification" +msgstr "功能验证" + +#: ../../source/tutorials/models/GLM4.x.md:730 +msgid "Once your server is started, you can query the model with input prompts:" +msgstr "服务器启动后,您可以使用输入提示词查询模型:" + +#: ../../source/tutorials/models/GLM4.x.md:749 +msgid "Accuracy Evaluation" +msgstr "精度评估" + +#: ../../source/tutorials/models/GLM4.x.md:751 +msgid "Here are two accuracy evaluation methods." +msgstr "这里有两种精度评估方法。" + +#: ../../source/tutorials/models/GLM4.x.md:753 +#: ../../source/tutorials/models/GLM4.x.md:770 +msgid "Using AISBench" +msgstr "使用 AISBench" + +#: ../../source/tutorials/models/GLM4.x.md:755 +msgid "" +"Refer to [Using " +"AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " +"details." +msgstr "详情请参考[使用 AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" + +#: ../../source/tutorials/models/GLM4.x.md:757 +msgid "" +"After execution, you can get the result, here is the result of `GLM4.7` " +"in `vllm-ascend:main` (after `vllm-ascend:0.14.0rc1`) for reference only." +msgstr "执行后,您可以获得结果,以下是 `GLM4.7` 在 `vllm-ascend:main`(`vllm-ascend:0.14.0rc1` 之后)中的结果,仅供参考。" + +#: ../../source/tutorials/models/GLM4.x.md:87 +msgid "dataset" +msgstr "数据集" + +#: ../../source/tutorials/models/GLM4.x.md:87 +msgid "version" +msgstr "版本" + +#: ../../source/tutorials/models/GLM4.x.md:87 +msgid "metric" +msgstr "指标" + +#: ../../source/tutorials/models/GLM4.x.md:87 +msgid "mode" +msgstr "模式" + +#: ../../source/tutorials/models/GLM4.x.md:87 +msgid "vllm-api-general-chat" +msgstr "vllm-api-general-chat" + +#: ../../source/tutorials/models/GLM4.x.md:87 +msgid "note" +msgstr "备注" + +#: ../../source/tutorials/models/GLM4.x.md:87 +msgid "GPQA" +msgstr "GPQA" + +#: ../../source/tutorials/models/GLM4.x.md:87 +msgid "-" +msgstr "-" + +#: ../../source/tutorials/models/GLM4.x.md:87 +msgid "accuracy" +msgstr "准确率" + +#: ../../source/tutorials/models/GLM4.x.md:87 +msgid "gen" +msgstr "生成" + +#: ../../source/tutorials/models/GLM4.x.md:87 +msgid "84.85" +msgstr "84.85" + +#: ../../source/tutorials/models/GLM4.x.md:87 +msgid "1 Atlas 800 A3 (64G × 16)" +msgstr "1 Atlas 800 A3 (64G × 16)" + +#: ../../source/tutorials/models/GLM4.x.md:87 +msgid "MATH500" +msgstr "MATH500" + +#: ../../source/tutorials/models/GLM4.x.md:87 +msgid "98.8" +msgstr "98.8" + +#: ../../source/tutorials/models/GLM4.x.md:764 +msgid "Using Language Model Evaluation Harness" +msgstr "使用语言模型评估工具" + +#: ../../source/tutorials/models/GLM4.x.md:766 +msgid "Not tested yet." +msgstr "尚未测试。" + +#: ../../source/tutorials/models/GLM4.x.md:768 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/GLM4.x.md:772 +msgid "" +"Refer to [Using AISBench for performance " +"evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation) for details." +msgstr "" +"详情请参考[使用AISBench进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" + +#: ../../source/tutorials/models/GLM4.x.md:774 +msgid "Using vLLM Benchmark" +msgstr "使用vLLM基准测试" + +#: ../../source/tutorials/models/GLM4.x.md:776 +msgid "Run performance evaluation of `GLM-4.x` as an example." +msgstr "以运行 `GLM-4.x` 的性能评估为例。" + +#: ../../source/tutorials/models/GLM4.x.md:778 +msgid "" +"Refer to [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/) " +"for more details." +msgstr "" +"更多详情请参考 [vllm基准测试](https://docs.vllm.ai/en/latest/benchmarking/)。" + +#: ../../source/tutorials/models/GLM4.x.md:780 +msgid "There are three `vllm bench` subcommands:" +msgstr "`vllm bench` 包含三个子命令:" + +#: ../../source/tutorials/models/GLM4.x.md:782 +msgid "`latency`: Benchmark the latency of a single batch of requests." +msgstr "`latency`:基准测试单批次请求的延迟。" + +#: ../../source/tutorials/models/GLM4.x.md:783 +msgid "`serve`: Benchmark the online serving throughput." +msgstr "`serve`:基准测试在线服务吞吐量。" + +#: ../../source/tutorials/models/GLM4.x.md:784 +msgid "`throughput`: Benchmark offline inference throughput." +msgstr "`throughput`:基准测试离线推理吞吐量。" + +#: ../../source/tutorials/models/GLM4.x.md:786 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例,运行以下代码。" + +#: ../../source/tutorials/models/GLM4.x.md:808 +msgid "" +"After about several minutes, you can get the performance evaluation " +"result." +msgstr "大约几分钟后,您将获得性能评估结果。" + +#: ../../source/tutorials/models/GLM4.x.md:810 +msgid "Best Practices" +msgstr "最佳实践" + +#: ../../source/tutorials/models/GLM4.x.md:812 +msgid "In this chapter, we recommend best practices for three scenarios:" +msgstr "本章节,我们针对三种场景推荐最佳实践:" + +#: ../../source/tutorials/models/GLM4.x.md:814 +msgid "" +"Long-context: For long sequences with low concurrency (≤ 4): set `dp1 " +"tp16`; For long sequences with high concurrency (> 4): set `dp2 tp8`" +msgstr "" +"长上下文:对于低并发(≤ 4)的长序列,设置 `dp1 tp16`;对于高并发(> 4)的长序列,设置 `dp2 tp8`" + +#: ../../source/tutorials/models/GLM4.x.md:815 +msgid "" +"Low-latency: For short sequences with low latency: we recommend setting " +"`dp2 tp8`" +msgstr "低延迟:对于需要低延迟的短序列,我们推荐设置 `dp2 tp8`" + +#: ../../source/tutorials/models/GLM4.x.md:816 +msgid "" +"High-throughput: For short sequences with high throughput: we also " +"recommend setting `dp2 tp8`" +msgstr "高吞吐量:对于需要高吞吐量的短序列,我们也推荐设置 `dp2 tp8`" + +#: ../../source/tutorials/models/GLM4.x.md:818 +msgid "" +"**Notice:** `max-model-len` and `max-num-seqs` need to be set according " +"to the actual usage scenario. For other settings, please refer to the " +"**[Deployment](#deployment)** chapter." +msgstr "" +"**注意:** `max-model-len` 和 `max-num-seqs` 需要根据实际使用场景进行设置。其他设置请参考 **[部署](#deployment)** 章节。" + +#: ../../source/tutorials/models/GLM4.x.md:821 +msgid "FAQ" +msgstr "常见问题" + +#: ../../source/tutorials/models/GLM4.x.md:823 +msgid "**Q: Why is the TPOT performance poor in Long-context test?**" +msgstr "**问:为什么在长上下文测试中TPOT性能不佳?**" + +#: ../../source/tutorials/models/GLM4.x.md:825 +msgid "" +"A: Please ensure that the FIA operator replacement script has been " +"executed successfully to complete the replacement of FIA operators. Here " +"is the script: " +"[A2](../../../../tools/install_flash_infer_attention_score_ops_a2.sh) and" +" [A3](../../../../tools/install_flash_infer_attention_score_ops_a3.sh)" +msgstr "" +"答:请确保已成功执行FIA算子替换脚本以完成FIA算子的替换。脚本如下:" +"[A2](../../../../tools/install_flash_infer_attention_score_ops_a2.sh) 和 " +"[A3](../../../../tools/install_flash_infer_attention_score_ops_a3.sh)" + +#: ../../source/tutorials/models/GLM4.x.md:827 +msgid "" +"**Q: Startup fails with HCCL port conflicts (address already bound). What" +" should I do?**" +msgstr "**问:启动失败,提示HCCL端口冲突(地址已被占用)。我该怎么办?**" + +#: ../../source/tutorials/models/GLM4.x.md:829 +msgid "A: Clean up old processes and restart: `pkill -f VLLM*`." +msgstr "答:清理旧进程并重启:`pkill -f VLLM*`。" + +#: ../../source/tutorials/models/GLM4.x.md:831 +msgid "**Q: How to handle OOM or unstable startup?**" +msgstr "**问:如何处理OOM或启动不稳定的问题?**" + +#: ../../source/tutorials/models/GLM4.x.md:833 +msgid "" +"A: Reduce `--max-num-seqs` and `--max-model-len` first. If needed, reduce" +" concurrency and load-testing pressure (e.g., `max-concurrency` / `num-" +"prompts`)." +msgstr "" +"答:首先减少 `--max-num-seqs` 和 `--max-model-len`。如有需要,降低并发度和负载测试压力(例如,`max-concurrency` / `num-prompts`)。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/GLM5.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/GLM5.po new file mode 100644 index 00000000..f25a0395 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/GLM5.po @@ -0,0 +1,475 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/GLM5.md:1 +msgid "GLM-5" +msgstr "GLM-5" + +#: ../../source/tutorials/models/GLM5.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/GLM5.md:5 +msgid "" +"[GLM-5](https://huggingface.co/zai-org/GLM-5) use a Mixture-of-Experts " +"(MoE) architecture and targeting at complex systems engineering and long-" +"horizon agentic tasks." +msgstr "" +"[GLM-5](https://huggingface.co/zai-org/GLM-5) 采用混合专家 (Mixture-of-Experts, MoE) 架构,旨在处理复杂系统工程和长视野智能体任务。" + +#: ../../source/tutorials/models/GLM5.md:7 +msgid "" +"The `GLM-5` model is first supported in `vllm-ascend:v0.17.0rc1`. In " +"`vllm-ascend:v0.17.0rc1` and `vllm-ascend:v0.18.0rc1` , the version of " +"transformers need to be upgraded to 5.2.0." +msgstr "" +"`GLM-5` 模型首次在 `vllm-ascend:v0.17.0rc1` 版本中得到支持。在 `vllm-ascend:v0.17.0rc1` 和 `vllm-ascend:v0.18.0rc1` 版本中,需要将 transformers 的版本升级到 5.2.0。" + +#: ../../source/tutorials/models/GLM5.md:9 +msgid "" +"This document will show the main verification steps of the model, " +"including supported features, feature configuration, environment " +"preparation, single-node and multi-node deployment, accuracy and " +"performance evaluation." +msgstr "" +"本文档将展示该模型的主要验证步骤,包括支持的特性、特性配置、环境准备、单节点和多节点部署、精度和性能评估。" + +#: ../../source/tutorials/models/GLM5.md:11 +msgid "Supported Features" +msgstr "支持的特性" + +#: ../../source/tutorials/models/GLM5.md:13 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "" +"请参考[支持的特性](../../user_guide/support_matrix/supported_models.md)以获取模型支持的特性矩阵。" + +#: ../../source/tutorials/models/GLM5.md:15 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "" +"请参考[特性指南](../../user_guide/feature_guide/index.md)以获取特性的配置方法。" + +#: ../../source/tutorials/models/GLM5.md:17 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/GLM5.md:19 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/GLM5.md:21 +msgid "" +"`GLM-5`(BF16 version): [Download model " +"weight](https://www.modelscope.cn/models/ZhipuAI/GLM-5)." +msgstr "" +"`GLM-5` (BF16 版本): [下载模型权重](https://www.modelscope.cn/models/ZhipuAI/GLM-5)。" + +#: ../../source/tutorials/models/GLM5.md:22 +msgid "" +"`GLM-5-w4a8`: [Download model weight](https://modelscope.cn/models/Eco-" +"Tech/GLM-5-w4a8)." +msgstr "" +"`GLM-5-w4a8`: [下载模型权重](https://modelscope.cn/models/Eco-Tech/GLM-5-w4a8)。" + +#: ../../source/tutorials/models/GLM5.md:23 +msgid "" +"`GLM-5-w8a8`: [Download model weight](https://www.modelscope.cn/models" +"/Eco-Tech/GLM-5-w8a8)." +msgstr "" +"`GLM-5-w8a8`: [下载模型权重](https://www.modelscope.cn/models/Eco-Tech/GLM-5-w8a8)。" + +#: ../../source/tutorials/models/GLM5.md:24 +msgid "" +"You can use [msmodelslim](https://gitcode.com/Ascend/msmodelslim) to " +"quantify the model naively." +msgstr "" +"您可以使用 [msmodelslim](https://gitcode.com/Ascend/msmodelslim) 对模型进行简单的量化。" + +#: ../../source/tutorials/models/GLM5.md:26 +msgid "" +"It is recommended to download the model weight to the shared directory of" +" multiple nodes, such as `/root/.cache/`" +msgstr "" +"建议将模型权重下载到多个节点的共享目录中,例如 `/root/.cache/`" + +#: ../../source/tutorials/models/GLM5.md:28 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/GLM5.md:30 +msgid "You can use our official docker image to run GLM-5 directly." +msgstr "您可以使用我们的官方 docker 镜像直接运行 GLM-5。" + +#: ../../source/tutorials/models/GLM5.md +msgid "A3 series" +msgstr "A3 系列" + +#: ../../source/tutorials/models/GLM5.md:39 +#: ../../source/tutorials/models/GLM5.md:86 +msgid "Start the docker image on your each node." +msgstr "在您的每个节点上启动 docker 镜像。" + +#: ../../source/tutorials/models/GLM5.md +msgid "A2 series" +msgstr "A2 系列" + +#: ../../source/tutorials/models/GLM5.md:119 +msgid "" +"In addition, if you don't want to use the docker image as above, you can " +"also build all from source:" +msgstr "此外,如果您不想使用上述的 docker 镜像,也可以从源码构建所有组件:" + +#: ../../source/tutorials/models/GLM5.md:121 +msgid "" +"Install `vllm-ascend` from source, refer to " +"[installation](https://docs.vllm.ai/projects/ascend/en/latest/installation.html)." +msgstr "" +"从源码安装 `vllm-ascend`,请参考[安装指南](https://docs.vllm.ai/projects/ascend/en/latest/installation.html)。" + +#: ../../source/tutorials/models/GLM5.md:123 +msgid "" +"If you want to deploy multi-node environment, you need to set up " +"environment on each node." +msgstr "如果您想部署多节点环境,需要在每个节点上设置环境。" + +#: ../../source/tutorials/models/GLM5.md:125 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/GLM5.md:127 +msgid "Single-node Deployment" +msgstr "单节点部署" + +#: ../../source/tutorials/models/GLM5.md:136 +msgid "" +"Quantized model `glm-5-w4a8` can be deployed on 1 Atlas 800 A3 (64G × 16)" +" ." +msgstr "量化模型 `glm-5-w4a8` 可以部署在 1 台 Atlas 800 A3 (64G × 16) 上。" + +#: ../../source/tutorials/models/GLM5.md:138 +#: ../../source/tutorials/models/GLM5.md:173 +#: ../../source/tutorials/models/GLM5.md:213 +msgid "Run the following script to execute online inference." +msgstr "运行以下脚本来执行在线推理。" + +#: ../../source/tutorials/models/GLM5.md:171 +msgid "" +"Quantized model `glm-5-w8a8` can be deployed on 1 Atlas 800 A3 (64G × 16)" +" ." +msgstr "量化模型 `glm-5-w8a8` 可以部署在 1 台 Atlas 800 A3 (64G × 16) 上。" + +#: ../../source/tutorials/models/GLM5.md:211 +msgid "Quantized model `glm-5-w4a8` can be deployed on 1 Atlas 800 A2 (64G × 8) ." +msgstr "量化模型 `glm-5-w4a8` 可以部署在 1 台 Atlas 800 A2 (64G × 8) 上。" + +#: ../../source/tutorials/models/GLM5.md:248 +msgid "**Notice:** The parameters are explained as follows:" +msgstr "**注意:** 参数解释如下:" + +#: ../../source/tutorials/models/GLM5.md:251 +msgid "" +"For single-node deployment, we recommend using `dp1tp16` and turn off " +"expert parallel in low-latency scenarios." +msgstr "对于单节点部署,在低延迟场景下,我们建议使用 `dp1tp16` 并关闭专家并行。" + +#: ../../source/tutorials/models/GLM5.md:252 +msgid "" +"`--async-scheduling` Asynchronous scheduling is a technique used to " +"optimize inference efficiency. It allows non-blocking task scheduling to " +"improve concurrency and throughput, especially when processing large-" +"scale models." +msgstr "`--async-scheduling` 异步调度是一种用于优化推理效率的技术。它允许非阻塞的任务调度,以提高并发性和吞吐量,尤其是在处理大规模模型时。" + +#: ../../source/tutorials/models/GLM5.md:254 +msgid "Multi-node Deployment" +msgstr "多节点部署" + +#: ../../source/tutorials/models/GLM5.md:256 +msgid "" +"If you want to deploy multi-node environment, you need to verify multi-" +"node communication according to [verify multi-node communication " +"environment](../../installation.md#verify-multi-node-communication)." +msgstr "如果您想部署多节点环境,需要根据[验证多节点通信环境](../../installation.md#verify-multi-node-communication)来验证多节点通信。" + +#: ../../source/tutorials/models/GLM5.md:265 +msgid "`glm-5-bf16`: require at least 2 Atlas 800 A3 (64G × 16)." +msgstr "`glm-5-bf16`: 需要至少 2 台 Atlas 800 A3 (64G × 16)。" + +#: ../../source/tutorials/models/GLM5.md:267 +#: ../../source/tutorials/models/GLM5.md:363 +#: ../../source/tutorials/models/GLM5.md:528 +msgid "Run the following scripts on two nodes respectively." +msgstr "分别在两个节点上运行以下脚本。" + +#: ../../source/tutorials/models/GLM5.md:269 +#: ../../source/tutorials/models/GLM5.md:365 +#: ../../source/tutorials/models/GLM5.md:530 +msgid "**node 0**" +msgstr "**节点 0**" + +#: ../../source/tutorials/models/GLM5.md:313 +#: ../../source/tutorials/models/GLM5.md:411 +#: ../../source/tutorials/models/GLM5.md:580 +msgid "**node 1**" +msgstr "**节点 1**" + +#: ../../source/tutorials/models/GLM5.md:461 +msgid "" +"For bf16 weight, use this script on each node to enable [Multi Token " +"Prediction " +"(MTP)](../../user_guide/feature_guide/Multi_Token_Prediction.md)." +msgstr "对于 bf16 权重,在每个节点上使用此脚本来启用[多令牌预测 (MTP)](../../user_guide/feature_guide/Multi_Token_Prediction.md)。" + +#: ../../source/tutorials/models/GLM5.md:526 +msgid "`glm-5-w8a8`: require 2 Atlas 800 A3 (64G × 16)." +msgstr "`glm-5-w8a8`: 需要 2 台 Atlas 800 A3 (64G × 16)。" + +#: ../../source/tutorials/models/GLM5.md:634 +msgid "Prefill-Decode Disaggregation" +msgstr "Prefill-Decode 解耦部署" + +#: ../../source/tutorials/models/GLM5.md:636 +msgid "" +"We'd like to show the deployment guide of `GLM-5` on multi-node " +"environment with 1P1D for better performance." +msgstr "我们将展示 `GLM-5` 在多节点环境下采用 1P1D 模式以获得更好性能的部署指南。" + +#: ../../source/tutorials/models/GLM5.md:638 +msgid "Before you start, please" +msgstr "在开始之前,请" + +#: ../../source/tutorials/models/GLM5.md:640 +msgid "prepare the script `launch_online_dp.py` on each node:" +msgstr "在每个节点上准备脚本 `launch_online_dp.py`:" + +#: ../../source/tutorials/models/GLM5.md:743 +msgid "prepare the script `run_dp_template.sh` on each node." +msgstr "在每个节点上准备脚本 `run_dp_template.sh`。" + +#: ../../source/tutorials/models/GLM5.md:745 +msgid "" +"To support a 200k context window on the stage of prefill, the parameter " +"`\"layer_sharding\": [\"q_b_proj\"]` needs to be added to " +"`--additional_config` on each prefill node. In PD-disaggregated " +"deployment, `layer_sharding` is supported only on prefill/P nodes with " +"`kv_role=\"kv_producer\"`; do not enable it on decode/D nodes or " +"`kv_role=\"kv_both\"` nodes." +msgstr "为了在预填充阶段支持 200k 的上下文窗口,需要在每个预填充节点的 `--additional_config` 中添加参数 `\"layer_sharding\": [\"q_b_proj\"]`。在 PD 解耦部署中,`layer_sharding` 仅在 `kv_role=\"kv_producer\"` 的预填充/P 节点上受支持;不要在解码/D 节点或 `kv_role=\"kv_both\"` 的节点上启用它。" + +#: ../../source/tutorials/models/GLM5.md:747 +#: ../../source/tutorials/models/GLM5.md:1233 +msgid "Prefill node 0" +msgstr "预填充节点 0" + +#: ../../source/tutorials/models/GLM5.md:826 +#: ../../source/tutorials/models/GLM5.md:1240 +msgid "Prefill node 1" +msgstr "预填充节点 1" + +#: ../../source/tutorials/models/GLM5.md:906 +#: ../../source/tutorials/models/GLM5.md:1247 +msgid "Decode node 0" +msgstr "解码节点 0" + +#: ../../source/tutorials/models/GLM5.md:988 +#: ../../source/tutorials/models/GLM5.md:1254 +msgid "Decode node 1" +msgstr "解码节点 1" + +#: ../../source/tutorials/models/GLM5.md:1069 +#: ../../source/tutorials/models/GLM5.md:1261 +msgid "Decode node 2" +msgstr "解码节点 2" + +#: ../../source/tutorials/models/GLM5.md:1150 +#: ../../source/tutorials/models/GLM5.md:1268 +msgid "Decode node 3" +msgstr "解码节点 3" + +#: ../../source/tutorials/models/GLM5.md:1231 +msgid "" +"Once the preparation is done, you can start the server with the following" +" command on each node:" +msgstr "准备工作完成后,您可以在每个节点上使用以下命令启动服务器:" + +#: ../../source/tutorials/models/GLM5.md:1275 +msgid "Request Forwarding" +msgstr "请求转发" + +#: ../../source/tutorials/models/GLM5.md:1277 +msgid "" +"To set up request forwarding, run the following script on any machine. " +"You can get the proxy program in the repository's examples: " +"[load_balance_proxy_server_example.py](https://github.com/vllm-project" +"/vllm-" +"ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" +msgstr "要设置请求转发,请在任何机器上运行以下脚本。您可以在仓库的示例中找到代理程序:[load_balance_proxy_server_example.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" + +#: ../../source/tutorials/models/GLM5.md:1318 +msgid "**Notice:**" +msgstr "**注意:**" + +#: ../../source/tutorials/models/GLM5.md:1320 +msgid "Some configurations for optimization are shown below:" +msgstr "以下是一些用于优化的配置:" + +#: ../../source/tutorials/models/GLM5.md:1322 +msgid "" +"`VLLM_ASCEND_ENABLE_FLASHCOMM1`: Enable FlashComm optimization to reduce " +"communication and computation overhead on prefill node. With FlashComm " +"enabled, layer_sharding list cannot include o_proj as an element." +msgstr "`VLLM_ASCEND_ENABLE_FLASHCOMM1`: 启用 FlashComm 优化以减少预填充节点上的通信和计算开销。启用 FlashComm 后,layer_sharding 列表不能包含 o_proj 作为元素。" + +#: ../../source/tutorials/models/GLM5.md:1323 +msgid "" +"`VLLM_ASCEND_ENABLE_FUSED_MC2`: Enable following fused operators: " +"dispatch_gmm_combine_decode and dispatch_ffn_combine operator." +msgstr "`VLLM_ASCEND_ENABLE_FUSED_MC2`: 启用以下融合算子:dispatch_gmm_combine_decode 和 dispatch_ffn_combine 算子。" + +#: ../../source/tutorials/models/GLM5.md:1324 +msgid "`VLLM_ASCEND_ENABLE_MLAPO`: Enable fused operator MlaPreprocessOperation." +msgstr "`VLLM_ASCEND_ENABLE_MLAPO`: 启用融合算子 MlaPreprocessOperation。" + +#: ../../source/tutorials/models/GLM5.md:1326 +msgid "" +"Please refer to the following python file for further explanation and " +"restrictions of the environment variables above: " +"[envs.py](https://github.com/vllm-project/vllm-" +"ascend/blob/main/vllm_ascend/envs.py)" +msgstr "有关上述环境变量的进一步解释和限制,请参考以下 python 文件:[envs.py](https://github.com/vllm-project/vllm-ascend/blob/main/vllm_ascend/envs.py)" + +#: ../../source/tutorials/models/GLM5.md:1328 +msgid "Functional Verification" +msgstr "功能验证" + +#: ../../source/tutorials/models/GLM5.md:1330 +msgid "Once your server is started, you can query the model with input prompts:" +msgstr "服务器启动后,您可以使用输入提示词查询模型:" + +#: ../../source/tutorials/models/GLM5.md:1343 +msgid "Accuracy Evaluation" +msgstr "精度评估" + +#: ../../source/tutorials/models/GLM5.md:1345 +msgid "Here are two accuracy evaluation methods." +msgstr "以下是两种精度评估方法。" + +#: ../../source/tutorials/models/GLM5.md:1347 +#: ../../source/tutorials/models/GLM5.md:1359 +msgid "Using AISBench" +msgstr "使用AISBench" + +#: ../../source/tutorials/models/GLM5.md:1349 +msgid "" +"Refer to [Using " +"AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " +"details." +msgstr "详情请参考[使用AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" + +#: ../../source/tutorials/models/GLM5.md:1351 +msgid "After execution, you can get the result." +msgstr "执行后,您将获得结果。" + +#: ../../source/tutorials/models/GLM5.md:1353 +msgid "Using Language Model Evaluation Harness" +msgstr "使用Language Model Evaluation Harness" + +#: ../../source/tutorials/models/GLM5.md:1355 +msgid "Not tested yet." +msgstr "尚未测试。" + +#: ../../source/tutorials/models/GLM5.md:1357 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/GLM5.md:1361 +msgid "" +"Refer to [Using AISBench for performance " +"evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation) for details." +msgstr "详情请参考[使用AISBench进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" + +#: ../../source/tutorials/models/GLM5.md:1363 +msgid "Using vLLM Benchmark" +msgstr "使用vLLM基准测试" + +#: ../../source/tutorials/models/GLM5.md:1365 +msgid "" +"Refer to [vllm " +"benchmark](https://docs.vllm.ai/en/latest/contributing/benchmarks.html) " +"for more details." +msgstr "更多详情请参考[vllm基准测试](https://docs.vllm.ai/en/latest/contributing/benchmarks.html)。" + +#: ../../source/tutorials/models/GLM5.md:1367 +msgid "Best Practices" +msgstr "最佳实践" + +#: ../../source/tutorials/models/GLM5.md:1369 +msgid "" +"In this chapter, we recommend best practices in prefill-decode " +"disaggregation scenario with 1P1D architecture using 4 Atlas 800 A3 (64G " +"× 16):" +msgstr "本章节,我们推荐在使用4台Atlas 800 A3(64G × 16)的1P1D架构下,预填充-解码分离场景的最佳实践:" + +#: ../../source/tutorials/models/GLM5.md:1371 +msgid "" +"Low-latency: We recommend setting `dp4 tp8` on prefill nodes and `dp4 " +"tp8` on decode nodes for low latency situation." +msgstr "低延迟场景:对于低延迟场景,我们建议在预填充节点上设置`dp4 tp8`,在解码节点上设置`dp4 tp8`。" + +#: ../../source/tutorials/models/GLM5.md:1372 +msgid "" +"High-throughput: `dp4 tp8` on prefill nodes and `dp8 tp4` on decode nodes" +" is recommended for high throughput situation." +msgstr "高吞吐场景:对于高吞吐场景,建议在预填充节点上设置`dp4 tp8`,在解码节点上设置`dp8 tp4`。" + +#: ../../source/tutorials/models/GLM5.md:1374 +msgid "" +"**Notice:** `max-model-len` and `max-num-seqs` need to be set according " +"to the actual usage scenario. For other settings, please refer to the " +"**[Deployment](#deployment)** chapter." +msgstr "**注意:** `max-model-len`和`max-num-seqs`需要根据实际使用场景进行设置。其他设置请参考**[部署](#deployment)**章节。" + +#: ../../source/tutorials/models/GLM5.md:1377 +msgid "FAQ" +msgstr "常见问题" + +#: ../../source/tutorials/models/GLM5.md:1379 +msgid "" +"**Q: How to solve ValueError: Tokenizer class TokenizersBackend does not " +"exist or is not currently imported?**" +msgstr "**问:如何解决ValueError: Tokenizer class TokenizersBackend does not exist or is not currently imported?**" + +#: ../../source/tutorials/models/GLM5.md:1381 +msgid "A: Please update the version of transformers to 5.2.0" +msgstr "答:请将transformers版本更新至5.2.0" + +#: ../../source/tutorials/models/GLM5.md:1383 +msgid "**Q: How to enable function calling for GLM-5?**" +msgstr "**问:如何为GLM-5启用函数调用功能?**" + +#: ../../source/tutorials/models/GLM5.md:1385 +msgid "A: Please add following configurations in vLLM startup command" +msgstr "答:请在vLLM启动命令中添加以下配置" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Kimi-K2-Thinking.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Kimi-K2-Thinking.po new file mode 100644 index 00000000..0d28c10c --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Kimi-K2-Thinking.po @@ -0,0 +1,134 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:1 +msgid "Kimi-K2-Thinking" +msgstr "Kimi-K2-Thinking" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:5 +msgid "" +"Kimi-K2-Thinking is a large-scale Mixture-of-Experts (MoE) model " +"developed by Moonshot AI. It features a hybrid thinking architecture that" +" excels in complex reasoning and problem-solving tasks." +msgstr "Kimi-K2-Thinking 是由 Moonshot AI 开发的大规模专家混合模型。它采用混合思维架构,在复杂推理和问题解决任务中表现出色。" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:7 +msgid "" +"This document will show the main verification steps of the model, " +"including supported features, environment preparation, single-node " +"deployment, and functional verification." +msgstr "本文档将展示该模型的主要验证步骤,包括支持的功能、环境准备、单节点部署和功能验证。" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:9 +msgid "Supported Features" +msgstr "支持的功能" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:11 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考[支持的功能](../../user_guide/support_matrix/supported_models.md)以获取模型支持的功能矩阵。" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:13 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "请参考[功能指南](../../user_guide/feature_guide/index.md)以获取功能的配置信息。" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:15 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:17 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:19 +msgid "" +"`Kimi-K2-Thinking`(bfloat16): require 1 Atlas 800 A3 (64G × 16) node. " +"[Download model " +"weight](https://huggingface.co/moonshotai/Kimi-K2-Thinking)." +msgstr "`Kimi-K2-Thinking`(bfloat16):需要 1 个 Atlas 800 A3 (64G × 16) 节点。[下载模型权重](https://huggingface.co/moonshotai/Kimi-K2-Thinking)。" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:21 +msgid "" +"It is recommended to download the model weight to the shared directory, " +"such as `/mnt/sfs_turbo/.cache/`." +msgstr "建议将模型权重下载到共享目录,例如 `/mnt/sfs_turbo/.cache/`。" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:23 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:25 +msgid "You can use our official docker image to run `Kimi-K2-Thinking` directly." +msgstr "您可以使用我们的官方 Docker 镜像直接运行 `Kimi-K2-Thinking`。" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:27 +msgid "" +"Select an image based on your machine type and start the docker image on " +"your node, refer to [using docker](../../installation.md#set-up-using-" +"docker)." +msgstr "根据您的机器类型选择镜像并在节点上启动 Docker 镜像,请参考[使用 Docker](../../installation.md#set-up-using-docker)。" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:29 +msgid "Run with Docker" +msgstr "使用 Docker 运行" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:72 +msgid "Verify the Quantized Model" +msgstr "验证量化模型" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:74 +msgid "" +"Please be advised to edit the value of " +"`\"quantization_config.config_groups.group_0.targets\"` from " +"`[\"Linear\"]` into `[\"MoE\"]` in `config.json` of original model " +"downloaded from [Hugging " +"Face](https://huggingface.co/moonshotai/Kimi-K2-Thinking)." +msgstr "请注意,请将从 [Hugging Face](https://huggingface.co/moonshotai/Kimi-K2-Thinking) 下载的原始模型的 `config.json` 文件中的 `\"quantization_config.config_groups.group_0.targets\"` 值从 `[\"Linear\"]` 修改为 `[\"MoE\"]`。" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:90 +msgid "Your model files look like:" +msgstr "您的模型文件应类似如下结构:" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:109 +msgid "Online Inference on Multi-NPU" +msgstr "多 NPU 在线推理" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:111 +msgid "Run the following script to start the vLLM server on Multi-NPU:" +msgstr "运行以下脚本以在多 NPU 上启动 vLLM 服务器:" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:113 +msgid "" +"For an Atlas 800 A3 (64G*16) node, tensor-parallel-size should be at " +"least 16." +msgstr "对于 Atlas 800 A3 (64G*16) 节点,张量并行大小应至少为 16。" + +#: ../../source/tutorials/models/Kimi-K2-Thinking.md:136 +msgid "Once your server is started, you can query the model with input prompts." +msgstr "服务器启动后,您可以使用输入提示词查询模型。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Kimi-K2.5.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Kimi-K2.5.po new file mode 100644 index 00000000..d6a064d6 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Kimi-K2.5.po @@ -0,0 +1,582 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Kimi-K2.5.md:1 +msgid "Kimi-K2.5" +msgstr "Kimi-K2.5" + +#: ../../source/tutorials/models/Kimi-K2.5.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/Kimi-K2.5.md:5 +msgid "" +"Kimi K2.5 is an open-source, native multimodal agentic model built " +"through continual pretraining on approximately 15 trillion mixed visual " +"and text tokens atop Kimi-K2-Base. It seamlessly integrates vision and " +"language understanding with advanced agentic capabilities, instant and " +"thinking modes, as well as conversational and agentic paradigms." +msgstr "" +"Kimi K2.5 是一个开源的、原生的多模态智能体模型,通过在 Kimi-K2-Base 基础上持续预训练约 15 万亿视觉和文本混合令牌构建而成。它无缝集成了视觉与语言理解能力、先进的智能体能力、即时与思考模式,以及对话式和智能体范式。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:7 +msgid "The `Kimi-K2.5` model is first supported in `vllm-ascend:v0.17.0rc1`." +msgstr "`Kimi-K2.5` 模型首次在 `vllm-ascend:v0.17.0rc1` 版本中得到支持。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:9 +msgid "" +"This document will show the main verification steps of the model, " +"including supported features, feature configuration, environment " +"preparation, single-node and multi-node deployment, accuracy and " +"performance evaluation." +msgstr "本文档将展示该模型的主要验证步骤,包括支持的特性、特性配置、环境准备、单节点与多节点部署、精度和性能评估。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:11 +msgid "Supported Features" +msgstr "支持的特性" + +#: ../../source/tutorials/models/Kimi-K2.5.md:13 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考 [支持的特性](../../user_guide/support_matrix/supported_models.md) 获取模型支持的特性矩阵。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:15 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "请参考 [特性指南](../../user_guide/feature_guide/index.md) 获取特性的配置信息。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:17 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/Kimi-K2.5.md:19 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/Kimi-K2.5.md:21 +msgid "" +"`Kimi-K2.5-w4a8`(Quantized version for w4a8): [Download model " +"weight](https://modelscope.cn/models/Eco-Tech/Kimi-K2.5-W4A8)." +msgstr "`Kimi-K2.5-w4a8`(w4a8量化版本):[下载模型权重](https://modelscope.cn/models/Eco-Tech/Kimi-K2.5-W4A8)。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:22 +msgid "" +"`kimi-k2.5-eagle3`(Eagle3 MTP draft model for accelerating inference of " +"Kimi-K2.5): [Download model " +"weight](https://huggingface.co/lightseekorg/kimi-k2.5-eagle3)" +msgstr "`kimi-k2.5-eagle3`(用于加速 Kimi-K2.5 推理的 Eagle3 MTP 草稿模型):[下载模型权重](https://huggingface.co/lightseekorg/kimi-k2.5-eagle3)" + +#: ../../source/tutorials/models/Kimi-K2.5.md:24 +msgid "" +"It is recommended to download the model weight to the shared directory of" +" multiple nodes, such as `/root/.cache/`." +msgstr "建议将模型权重下载到多节点的共享目录中,例如 `/root/.cache/`。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:26 +msgid "Verify Multi-node Communication(Optional)" +msgstr "验证多节点通信(可选)" + +#: ../../source/tutorials/models/Kimi-K2.5.md:28 +msgid "" +"If you want to deploy multi-node environment, you need to verify multi-" +"node communication according to [verify multi-node communication " +"environment](../../installation.md#verify-multi-node-communication)." +msgstr "如果您想部署多节点环境,需要根据 [验证多节点通信环境](../../installation.md#verify-multi-node-communication) 验证多节点通信。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:30 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/Kimi-K2.5.md:32 +msgid "You can use our official docker image to run `Kimi-K2.5` directly." +msgstr "您可以使用我们的官方 docker 镜像直接运行 `Kimi-K2.5`。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:34 +msgid "" +"Select an image based on your machine type and start the docker image on " +"your node, refer to [using docker](../../installation.md#set-up-using-" +"docker)." +msgstr "根据您的机器类型选择镜像,并在节点上启动 docker 镜像,请参考 [使用 docker](../../installation.md#set-up-using-docker)。" + +#: ../../source/tutorials/models/Kimi-K2.5.md +msgid "A3 series" +msgstr "A3 系列" + +#: ../../source/tutorials/models/Kimi-K2.5.md:43 +#: ../../source/tutorials/models/Kimi-K2.5.md:86 +msgid "Start the docker image on your each node." +msgstr "在您的每个节点上启动 docker 镜像。" + +#: ../../source/tutorials/models/Kimi-K2.5.md +msgid "A2 series" +msgstr "A2 系列" + +#: ../../source/tutorials/models/Kimi-K2.5.md:119 +msgid "" +"In addition, if you don't want to use the docker image as above, you can " +"also build all from source:" +msgstr "此外,如果您不想使用上述 docker 镜像,也可以从源码构建所有内容:" + +#: ../../source/tutorials/models/Kimi-K2.5.md:121 +msgid "" +"Install `vllm-ascend` from source, refer to " +"[installation](../../installation.md)." +msgstr "从源码安装 `vllm-ascend`,请参考 [安装](../../installation.md)。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:123 +msgid "" +"If you want to deploy multi-node environment, you need to set up " +"environment on each node." +msgstr "如果您想部署多节点环境,需要在每个节点上设置环境。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:125 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/Kimi-K2.5.md:127 +msgid "Single-node Deployment" +msgstr "单节点部署" + +#: ../../source/tutorials/models/Kimi-K2.5.md:129 +msgid "" +"Quantized model `Kimi-K2.5-w4a8` can be deployed on 1 Atlas 800 A3 (64G ×" +" 16)." +msgstr "量化模型 `Kimi-K2.5-w4a8` 可以部署在 1 台 Atlas 800 A3(64G × 16)上。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:131 +msgid "Run the following script to execute online inference." +msgstr "运行以下脚本执行在线推理。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:176 +#: ../../source/tutorials/models/Kimi-K2.5.md:645 +msgid "**Notice:** The parameters are explained as follows:" +msgstr "**注意:** 参数解释如下:" + +#: ../../source/tutorials/models/Kimi-K2.5.md:179 +msgid "" +"Setting the environment variable `VLLM_ASCEND_BALANCE_SCHEDULING=1` " +"enables balance scheduling. This may help increase output throughput and " +"reduce TPOT in v1 scheduler. However, TTFT may degrade in some scenarios." +" Furthermore, enabling this feature is not recommended in scenarios where" +" PD is separated." +msgstr "设置环境变量 `VLLM_ASCEND_BALANCE_SCHEDULING=1` 启用均衡调度。这可能有助于提高 v1 调度器中的输出吞吐量并降低 TPOT。然而,在某些场景下 TTFT 可能会下降。此外,在 PD 分离的场景中不建议启用此功能。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:180 +msgid "" +"For single-node deployment, we recommend using `dp4tp4` instead of " +"`dp2tp8`." +msgstr "对于单节点部署,我们建议使用 `dp4tp4` 而不是 `dp2tp8`。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:181 +msgid "" +"`--max-model-len` specifies the maximum context length - that is, the sum" +" of input and output tokens for a single request. For performance testing" +" with an input length of 3.5K and output length of 1.5K, a value of " +"`16384` is sufficient, however, for precision testing, please set it at " +"least `35000`." +msgstr "`--max-model-len` 指定最大上下文长度——即单个请求的输入和输出令牌总数。对于输入长度 3.5K 和输出长度 1.5K 的性能测试,`16384` 的值就足够了,但对于精度测试,请至少将其设置为 `35000`。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:182 +msgid "" +"`--no-enable-prefix-caching` indicates that prefix caching is disabled. " +"To enable it, remove this option." +msgstr "`--no-enable-prefix-caching` 表示前缀缓存被禁用。要启用它,请移除此选项。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:183 +msgid "" +"`--mm-encoder-tp-mode` indicates how to optimize multi-modal encoder " +"inference using tensor parallelism (TP). If you want to test the " +"multimodal inputs, we recommend using `data`." +msgstr "`--mm-encoder-tp-mode` 指示如何使用张量并行(TP)优化多模态编码器推理。如果您想测试多模态输入,我们建议使用 `data`。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:184 +msgid "" +"If you use the w4a8 weight, more memory will be allocated to kvcache, and" +" you can try to increase system throughput to achieve greater throughput." +msgstr "如果您使用 w4a8 权重,将有更多内存分配给 kvcache,您可以尝试增加系统吞吐量以实现更高的吞吐量。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:186 +msgid "Multi-node Deployment" +msgstr "多节点部署" + +#: ../../source/tutorials/models/Kimi-K2.5.md:188 +msgid "`Kimi-K2.5-w4a8`: require at least 2 Atlas 800 A2 (64G × 8)." +msgstr "`Kimi-K2.5-w4a8`:需要至少 2 台 Atlas 800 A2(64G × 8)。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:190 +msgid "Run the following scripts on two nodes respectively." +msgstr "分别在两个节点上运行以下脚本。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:192 +msgid "**Node 0**" +msgstr "**节点 0**" + +#: ../../source/tutorials/models/Kimi-K2.5.md:256 +msgid "**Node 1**" +msgstr "**节点 1**" + +#: ../../source/tutorials/models/Kimi-K2.5.md:322 +msgid "Prefill-Decode Disaggregation" +msgstr "Prefill-Decode 分离" + +#: ../../source/tutorials/models/Kimi-K2.5.md:324 +msgid "" +"We recommend using Mooncake for deployment: " +"[Mooncake](../features/pd_disaggregation_mooncake_multi_node.md)." +msgstr "我们建议使用 Mooncake 进行部署:[Mooncake](../features/pd_disaggregation_mooncake_multi_node.md)。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:326 +msgid "" +"Take Atlas 800 A3 (64G × 16) for example, we recommend to deploy 2P1D (4 " +"nodes) rather than 1P1D (2 nodes), because there is no enough NPU memory " +"to serve high concurrency in 1P1D case." +msgstr "以 Atlas 800 A3(64G × 16)为例,我们建议部署 2P1D(4 个节点)而不是 1P1D(2 个节点),因为在 1P1D 情况下没有足够的 NPU 内存来服务高并发。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:328 +msgid "`Kimi-K2.5-w4a8 2P1D` require 4 Atlas 800 A3 (64G × 16)." +msgstr "`Kimi-K2.5-w4a8 2P1D` 需要 4 台 Atlas 800 A3(64G × 16)。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:330 +msgid "" +"To run the vllm-ascend `Prefill-Decode Disaggregation` service, you need " +"to deploy a `launch_dp_program.py` script and a `run_dp_template.sh` " +"script on each node and deploy a `proxy.sh` script on prefill master node" +" to forward requests." +msgstr "要运行 vllm-ascend `Prefill-Decode Disaggregation` 服务,您需要在每个节点上部署一个 `launch_dp_program.py` 脚本和一个 `run_dp_template.sh` 脚本,并在 prefill 主节点上部署一个 `proxy.sh` 脚本来转发请求。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:332 +msgid "" +"`launch_online_dp.py` to launch external dp vllm servers. " +"[launch\\_online\\_dp.py](https://github.com/vllm-project/vllm-" +"ascend/blob/main/examples/external_online_dp/launch_online_dp.py)" +msgstr "`launch_online_dp.py` 用于启动外部 dp vllm 服务器。[launch\\_online\\_dp.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/external_online_dp/launch_online_dp.py)" + +#: ../../source/tutorials/models/Kimi-K2.5.md:335 +msgid "Prefill Node 0 `run_dp_template.sh` script" +msgstr "Prefill 节点 0 `run_dp_template.sh` 脚本" + +#: ../../source/tutorials/models/Kimi-K2.5.md:413 +msgid "Prefill Node 1 `run_dp_template.sh` script" +msgstr "Prefill 节点 1 `run_dp_template.sh` 脚本" + +#: ../../source/tutorials/models/Kimi-K2.5.md:491 +msgid "Decode Node 0 `run_dp_template.sh` script" +msgstr "Decode 节点 0 `run_dp_template.sh` 脚本" + +#: ../../source/tutorials/models/Kimi-K2.5.md:568 +msgid "Decode Node 1 `run_dp_template.sh` script" +msgstr "Decode 节点 1 `run_dp_template.sh` 脚本" + +#: ../../source/tutorials/models/Kimi-K2.5.md:648 +msgid "" +"`VLLM_ASCEND_ENABLE_FLASHCOMM1=1`: enables the communication optimization" +" function on the prefill nodes." +msgstr "`VLLM_ASCEND_ENABLE_FLASHCOMM1=1`:在 prefill 节点上启用通信优化功能。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:649 +msgid "" +"`VLLM_ASCEND_ENABLE_MLAPO=1`: enables the fusion operator, which can " +"significantly improve performance but consumes more NPU memory. In the " +"Prefill-Decode (PD) separation scenario, enable MLAPO only on decode " +"nodes." +msgstr "`VLLM_ASCEND_ENABLE_MLAPO=1`:启用融合算子,这可以显著提高性能但会消耗更多 NPU 内存。在 Prefill-Decode(PD)分离场景中,仅在 decode 节点上启用 MLAPO。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:650 +msgid "" +"`--async-scheduling`: enables the asynchronous scheduling function. When " +"Multi-Token Prediction (MTP) is enabled, asynchronous scheduling of " +"operator delivery can be implemented to overlap the operator delivery " +"latency." +msgstr "`--async-scheduling`:启用异步调度功能。当启用多令牌预测(MTP)时,可以实现算子交付的异步调度以重叠算子交付延迟。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:651 +msgid "" +"`cudagraph_capture_sizes`: The recommended value is `n x (mtp + 1)`. And " +"the min is `n = 1` and the max is `n = max-num-seqs`. For other values, " +"it is recommended to set them to the number of frequently occurring " +"requests on the Decode (D) node." +msgstr "`cudagraph_capture_sizes`:推荐值为 `n x (mtp + 1)`。最小值为 `n = 1`,最大值为 `n = max-num-seqs`。对于其他值,建议将其设置为 Decode(D)节点上频繁出现的请求数量。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:652 +msgid "" +"`recompute_scheduler_enable: true`: enables the recomputation scheduler. " +"When the Key-Value Cache (KV Cache) of the decode node is insufficient, " +"requests will be sent to the prefill node to recompute the KV Cache. In " +"the PD separation scenario, it is recommended to enable this " +"configuration on both prefill and decode nodes simultaneously." +msgstr "`recompute_scheduler_enable: true`:启用重计算调度器。当 decode 节点的键值缓存(KV Cache)不足时,请求将被发送到 prefill 节点以重新计算 KV Cache。在 PD 分离场景中,建议同时在 prefill 和 decode 节点上启用此配置。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:653 +msgid "" +"`multistream_overlap_shared_expert: true`: When the Tensor Parallelism " +"(TP) size is 1 or `enable_shared_expert_dp: true`, an additional stream " +"is enabled to overlap the computation process of shared experts for " +"improved efficiency." +msgstr "`multistream_overlap_shared_expert: true`:当张量并行(TP)大小为 1 或 `enable_shared_expert_dp: true` 时,启用额外的流来重叠共享专家的计算过程以提高效率。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:655 +msgid "run server for each node:" +msgstr "为每个节点运行服务器:" + +#: ../../source/tutorials/models/Kimi-K2.5.md:668 +msgid "Run the `proxy.sh` script on the prefill master node" +msgstr "在 prefill 主节点上运行 `proxy.sh` 脚本" + +#: ../../source/tutorials/models/Kimi-K2.5.md:670 +msgid "" +"Run a proxy server on the same node with the prefiller service instance. " +"You can get the proxy program in the repository's examples: " +"[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-" +"project/vllm-" +"ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" +msgstr "在与 prefiller 服务实例相同的节点上运行一个代理服务器。您可以在仓库的示例中找到代理程序:[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" + +#: ../../source/tutorials/models/Kimi-K2.5.md:726 +msgid "Functional Verification" +msgstr "功能验证" + +#: ../../source/tutorials/models/Kimi-K2.5.md:728 +msgid "Once your server is started, you can query the model with input prompts:" +msgstr "一旦您的服务器启动,您就可以使用输入提示词查询模型:" + +#: ../../source/tutorials/models/Kimi-K2.5.md:749 +msgid "Accuracy Evaluation" +msgstr "精度评估" + +#: ../../source/tutorials/models/Kimi-K2.5.md:751 +msgid "Here are two accuracy evaluation methods." +msgstr "以下是两种精度评估方法。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:753 +#: ../../source/tutorials/models/Kimi-K2.5.md:768 +msgid "Using AISBench" +msgstr "使用 AISBench" + +#: ../../source/tutorials/models/Kimi-K2.5.md:755 +msgid "" +"Refer to [Using " +"AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " +"details." +msgstr "详情请参考 [使用 AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:757 +msgid "" +"After execution, you can get the result, here is the result of " +"`Kimi-K2.5-w4a8` in `vllm-ascend:v0.18.0rc1` for reference only." +msgstr "执行后,您将获得结果。以下为 `Kimi-K2.5-w4a8` 在 `vllm-ascend:v0.18.0rc1` 环境下的结果,仅供参考。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:88 +msgid "dataset" +msgstr "数据集" + +#: ../../source/tutorials/models/Kimi-K2.5.md:88 +msgid "version" +msgstr "版本" + +#: ../../source/tutorials/models/Kimi-K2.5.md:88 +msgid "metric" +msgstr "指标" + +#: ../../source/tutorials/models/Kimi-K2.5.md:88 +msgid "mode" +msgstr "模式" + +#: ../../source/tutorials/models/Kimi-K2.5.md:88 +msgid "vllm-api-general-chat" +msgstr "vllm-api-general-chat" + +#: ../../source/tutorials/models/Kimi-K2.5.md:88 +msgid "note" +msgstr "备注" + +#: ../../source/tutorials/models/Kimi-K2.5.md:88 +msgid "GSM8K" +msgstr "GSM8K" + +#: ../../source/tutorials/models/Kimi-K2.5.md:88 +msgid "-" +msgstr "-" + +#: ../../source/tutorials/models/Kimi-K2.5.md:88 +msgid "accuracy" +msgstr "准确率" + +#: ../../source/tutorials/models/Kimi-K2.5.md:88 +msgid "gen" +msgstr "生成" + +#: ../../source/tutorials/models/Kimi-K2.5.md:88 +msgid "96.07" +msgstr "96.07" + +#: ../../source/tutorials/models/Kimi-K2.5.md:88 +msgid "1 Atlas 800 A3 (64G × 16)" +msgstr "1 Atlas 800 A3 (64G × 16)" + +#: ../../source/tutorials/models/Kimi-K2.5.md:88 +msgid "AIME2025" +msgstr "AIME2025" + +#: ../../source/tutorials/models/Kimi-K2.5.md:88 +msgid "90.00" +msgstr "90.00" + +#: ../../source/tutorials/models/Kimi-K2.5.md:88 +msgid "GPQA" +msgstr "GPQA" + +#: ../../source/tutorials/models/Kimi-K2.5.md:88 +msgid "84.85" +msgstr "84.85" + +#: ../../source/tutorials/models/Kimi-K2.5.md:88 +msgid "TextVQA" +msgstr "TextVQA" + +#: ../../source/tutorials/models/Kimi-K2.5.md:88 +msgid "80.29" +msgstr "80.29" + +#: ../../source/tutorials/models/Kimi-K2.5.md:766 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/Kimi-K2.5.md:770 +msgid "" +"Refer to [Using AISBench for performance " +"evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation) for details." +msgstr "详情请参考 [使用 AISBench 进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:772 +msgid "Using vLLM Benchmark" +msgstr "使用 vLLM Benchmark" + +#: ../../source/tutorials/models/Kimi-K2.5.md:774 +msgid "Run performance evaluation of `Kimi-K2.5-w4a8` as an example." +msgstr "以运行 `Kimi-K2.5-w4a8` 的性能评估为例。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:776 +msgid "" +"Refer to [vllm " +"benchmark](https://docs.vllm.ai/en/latest/contributing/benchmarks.html) " +"for more details." +msgstr "更多详情请参考 [vllm benchmark](https://docs.vllm.ai/en/latest/contributing/benchmarks.html)。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:778 +msgid "There are three `vllm bench` subcommands:" +msgstr "`vllm bench` 包含三个子命令:" + +#: ../../source/tutorials/models/Kimi-K2.5.md:780 +msgid "`latency`: Benchmark the latency of a single batch of requests." +msgstr "`latency`:对单批请求的延迟进行基准测试。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:781 +msgid "`serve`: Benchmark the online serving throughput." +msgstr "`serve`:对在线服务吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:782 +msgid "`throughput`: Benchmark offline inference throughput." +msgstr "`throughput`:对离线推理吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:784 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例。运行以下代码。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:791 +msgid "" +"After about several minutes, you can get the performance evaluation " +"result." +msgstr "大约几分钟后,您将获得性能评估结果。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:793 +msgid "Best Practices" +msgstr "最佳实践" + +#: ../../source/tutorials/models/Kimi-K2.5.md:795 +msgid "In this chapter, we recommend best practices for three scenarios:" +msgstr "本章节针对三种场景推荐最佳实践:" + +#: ../../source/tutorials/models/Kimi-K2.5.md:797 +msgid "" +"Long-context: For long sequences with low concurrency (≤ 4): set `dp1 " +"tp16`; For long sequences with high concurrency (> 4): set `dp2 tp8`" +msgstr "长上下文:对于低并发(≤ 4)的长序列:设置 `dp1 tp16`;对于高并发(> 4)的长序列:设置 `dp2 tp8`" + +#: ../../source/tutorials/models/Kimi-K2.5.md:798 +msgid "" +"Low-latency: For short sequences with low latency: we recommend setting " +"`dp2 tp8`" +msgstr "低延迟:对于需要低延迟的短序列:我们推荐设置 `dp2 tp8`" + +#: ../../source/tutorials/models/Kimi-K2.5.md:799 +msgid "" +"High-throughput: For short sequences with high throughput: we also " +"recommend setting `dp4 tp4`" +msgstr "高吞吐量:对于需要高吞吐量的短序列:我们也推荐设置 `dp4 tp4`" + +#: ../../source/tutorials/models/Kimi-K2.5.md:801 +msgid "" +"**Notice:** `max-model-len` and `max-num-seqs` need to be set according " +"to the actual usage scenario. For other settings, please refer to the " +"**[Deployment](#deployment)** chapter." +msgstr "**注意:** `max-model-len` 和 `max-num-seqs` 需要根据实际使用场景进行设置。其他设置请参考 **[部署](#deployment)** 章节。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:804 +msgid "FAQ" +msgstr "常见问题" + +#: ../../source/tutorials/models/Kimi-K2.5.md:806 +msgid "**Q: Why is the TPOT performance poor in Long-context test?**" +msgstr "**问:为什么在长上下文测试中 TPOT 性能不佳?**" + +#: ../../source/tutorials/models/Kimi-K2.5.md:808 +msgid "" +"A: Please ensure that the FIA operator replacement script has been " +"executed successfully to complete the replacement of FIA operators. Here " +"is the script: " +"[A2](../../../../tools/install_flash_infer_attention_score_ops_a2.sh) and" +" [A3](../../../../tools/install_flash_infer_attention_score_ops_a3.sh)" +msgstr "答:请确保已成功执行 FIA 算子替换脚本以完成 FIA 算子的替换。脚本如下:[A2](../../../../tools/install_flash_infer_attention_score_ops_a2.sh) 和 [A3](../../../../tools/install_flash_infer_attention_score_ops_a3.sh)" + +#: ../../source/tutorials/models/Kimi-K2.5.md:810 +msgid "" +"**Q: Startup fails with HCCL port conflicts (address already bound). What" +" should I do?**" +msgstr "**问:启动失败,提示 HCCL 端口冲突(地址已被占用)。我该怎么办?**" + +#: ../../source/tutorials/models/Kimi-K2.5.md:812 +msgid "A: Clean up old processes and restart: `pkill -f VLLM*`." +msgstr "答:清理旧进程并重启:`pkill -f VLLM*`。" + +#: ../../source/tutorials/models/Kimi-K2.5.md:814 +msgid "**Q: How to handle OOM or unstable startup?**" +msgstr "**问:如何处理 OOM 或启动不稳定的问题?**" + +#: ../../source/tutorials/models/Kimi-K2.5.md:816 +msgid "" +"A: Reduce `--max-num-seqs` and `--max-model-len` first. If needed, reduce" +" concurrency and load-testing pressure (e.g., `max-concurrency` / `num-" +"prompts`)." +msgstr "答:首先减少 `--max-num-seqs` 和 `--max-model-len`。如有需要,降低并发度和压测压力(例如,`max-concurrency` / `num-prompts`)。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/MiniMax-M2.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/MiniMax-M2.po new file mode 100644 index 00000000..806dfba6 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/MiniMax-M2.po @@ -0,0 +1,574 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/MiniMax-M2.md:1 +msgid "MiniMax-M2" +msgstr "MiniMax-M2" + +#: ../../source/tutorials/models/MiniMax-M2.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/MiniMax-M2.md:5 +msgid "" +"MiniMax‑M2.5 is MiniMax’s flagship large language model, reinforced for " +"high‑value scenarios such as code generation, agentic tool " +"calling/search, and complex office workflows, with an emphasis on " +"reasoning efficiency and end‑to‑end speed on challenging tasks." +msgstr "" +"MiniMax‑M2.5 是 MiniMax 的旗舰大语言模型,针对代码生成、智能体工具调用/搜索以及复杂办公工作流等高价值场景进行了强化,重点在于推理效率和在挑战性任务上的端到端速度。" + +#: ../../source/tutorials/models/MiniMax-M2.md:7 +msgid "" +"MiniMax-M2.7 is MiniMax's first model deeply participating in its own " +"evolution. M2.7 is capable of building complex agent harnesses and " +"completing highly elaborate productivity tasks, leveraging Agent Teams, " +"complex Skills, and dynamic tool search." +msgstr "" +"MiniMax-M2.7 是 MiniMax 首个深度参与自身演进的模型。M2.7 能够构建复杂的智能体框架并完成高度精细的生产力任务,利用智能体团队、复杂技能和动态工具搜索。" + +#: ../../source/tutorials/models/MiniMax-M2.md:9 +msgid "" +"This document provides a unified deployment guide for `MiniMax-M2.5` and " +"`MiniMax-M2.7` on vLLM Ascend, covering both:" +msgstr "本文档提供了在 vLLM Ascend 上部署 `MiniMax-M2.5` 和 `MiniMax-M2.7` 的统一指南,涵盖以下两种部署方式:" + +#: ../../source/tutorials/models/MiniMax-M2.md:11 +msgid "**A3 single-node** deployment (Atlas 800 A3)" +msgstr "**A3 单节点**部署(Atlas 800 A3)" + +#: ../../source/tutorials/models/MiniMax-M2.md:12 +msgid "**A2 dual-node** deployment (2× Atlas 800I A2)" +msgstr "**A2 双节点**部署(2× Atlas 800I A2)" + +#: ../../source/tutorials/models/MiniMax-M2.md:14 +msgid "Supported Features" +msgstr "支持的功能" + +#: ../../source/tutorials/models/MiniMax-M2.md:16 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考[支持的功能](../../user_guide/support_matrix/supported_models.md)以获取模型支持的功能矩阵。" + +#: ../../source/tutorials/models/MiniMax-M2.md:18 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "请参考[功能指南](../../user_guide/feature_guide/index.md)以获取功能的配置信息。" + +#: ../../source/tutorials/models/MiniMax-M2.md:20 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/MiniMax-M2.md:22 +msgid "Model Weights" +msgstr "模型权重" + +#: ../../source/tutorials/models/MiniMax-M2.md:24 +msgid "" +"`MiniMax-M2.5` (fp8 checkpoint): recommended to use **1× Atlas 800 A3** " +"or **2× Atlas 800I A2** nodes. Download the model weights from " +"[MiniMax/MiniMax-M2.5](https://modelscope.cn/models/MiniMax/MiniMax-M2.5)." +msgstr "" +"`MiniMax-M2.5`(fp8 检查点):推荐使用 **1× Atlas 800 A3** 或 **2× Atlas 800I A2** 节点。从 " +"[MiniMax/MiniMax-M2.5](https://modelscope.cn/models/MiniMax/MiniMax-M2.5) 下载模型权重。" + +#: ../../source/tutorials/models/MiniMax-M2.md:25 +msgid "" +"`MiniMax-M2.5-w8a8-QuaRot` : Download the model weights from [Eco-" +"Tech/MiniMax-M2.5-w8a8-QuaRot](https://modelscope.cn/models/Eco-" +"Tech/MiniMax-M2.5-w8a8-QuaRot)." +msgstr "" +"`MiniMax-M2.5-w8a8-QuaRot`:从 [Eco-Tech/MiniMax-M2.5-w8a8-" +"QuaRot](https://modelscope.cn/models/Eco-Tech/MiniMax-M2.5-w8a8-QuaRot) 下载模型权重。" + +#: ../../source/tutorials/models/MiniMax-M2.md:26 +msgid "" +"`Eagle3` : Download the model weights from [vllm-ascend/MiniMax-M2.5" +"-eagel-model](https://modelscope.cn/models/vllm-ascend/MiniMax-M2.5" +"-eagel-model-0318)." +msgstr "" +"`Eagle3`:从 [vllm-ascend/MiniMax-M2.5-eagel-" +"model](https://modelscope.cn/models/vllm-ascend/MiniMax-M2.5-eagel-model-0318) 下载模型权重。" + +#: ../../source/tutorials/models/MiniMax-M2.md:27 +msgid "" +"`MiniMax-M2.7` (fp8 checkpoint): recommended to use **1× Atlas 800 A3** " +"or **2× Atlas 800I A2** nodes. Download the model weights from " +"[MiniMax/MiniMax-M2.7](https://modelscope.cn/models/MiniMax/MiniMax-M2.7)." +msgstr "" +"`MiniMax-M2.7`(fp8 检查点):推荐使用 **1× Atlas 800 A3** 或 **2× Atlas 800I A2** 节点。从 " +"[MiniMax/MiniMax-M2.7](https://modelscope.cn/models/MiniMax/MiniMax-M2.7) 下载模型权重。" + +#: ../../source/tutorials/models/MiniMax-M2.md:28 +msgid "" +"`MiniMax-M2.7-w8a8-QuaRot` : Download the model weights from [Eco-" +"Tech/MiniMax-M2.7-w8a8-QuaRot](https://modelscope.cn/models/Eco-" +"Tech/MiniMax-M2.7-w8a8-QuaRot)." +msgstr "" +"`MiniMax-M2.7-w8a8-QuaRot`:从 [Eco-Tech/MiniMax-M2.7-w8a8-" +"QuaRot](https://modelscope.cn/models/Eco-Tech/MiniMax-M2.7-w8a8-QuaRot) 下载模型权重。" + +#: ../../source/tutorials/models/MiniMax-M2.md:30 +msgid "" +"It is recommended to download the model weights to a shared directory, " +"such as `/mnt/sfs_turbo/.cache/`. The current release automatically " +"detects the MiniMax-M2 fp8 checkpoint, disables fp8 quantization kernels " +"on NPU, and loads the weights by dequantizing to bf16. This behavior may " +"be removed once public bf16 weights are available." +msgstr "" +"建议将模型权重下载到共享目录,例如 `/mnt/sfs_turbo/.cache/`。当前版本会自动检测 MiniMax-M2 的 fp8 检查点,在 NPU 上禁用 fp8 " +"量化内核,并通过反量化为 bf16 来加载权重。一旦公开的 bf16 权重可用,此行为可能会被移除。" + +#: ../../source/tutorials/models/MiniMax-M2.md:32 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/MiniMax-M2.md:34 +msgid "You can use the official docker image to run `MiniMax-M2.5/M2.7` directly." +msgstr "您可以使用官方的 docker 镜像直接运行 `MiniMax-M2.5/M2.7`。" + +#: ../../source/tutorials/models/MiniMax-M2.md:36 +msgid "" +"Select an image based on your machine type and start the container on " +"your node. See [using docker](../../installation.md#set-up-using-docker)." +msgstr "根据您的机器类型选择镜像,并在您的节点上启动容器。请参阅[使用 docker](../../installation.md#set-up-using-docker)。" + +#: ../../source/tutorials/models/MiniMax-M2.md:38 +msgid "Run with Docker" +msgstr "使用 Docker 运行" + +#: ../../source/tutorials/models/MiniMax-M2.md:40 +#: ../../source/tutorials/models/MiniMax-M2.md:129 +#: ../../source/tutorials/models/MiniMax-M2.md:332 +msgid "A3 (single node)" +msgstr "A3(单节点)" + +#: ../../source/tutorials/models/MiniMax-M2.md:83 +msgid "A2 (dual node, run on both nodes)" +msgstr "A2(双节点,在两个节点上运行)" + +#: ../../source/tutorials/models/MiniMax-M2.md:85 +msgid "Create and run `minimax25-docker-run.sh` on **both** A2 nodes." +msgstr "在**两个** A2 节点上创建并运行 `minimax25-docker-run.sh`。" + +#: ../../source/tutorials/models/MiniMax-M2.md:87 +#: ../../source/tutorials/models/MiniMax-M2.md:133 +msgid "Notes:" +msgstr "注意:" + +#: ../../source/tutorials/models/MiniMax-M2.md:89 +msgid "" +"The default configuration assumes an **Atlas 800I A2 8-NPU** node and " +"sets `ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7`. Update it based on your" +" hardware." +msgstr "默认配置假设为 **Atlas 800I A2 8-NPU** 节点,并设置 `ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7`。请根据您的硬件进行更新。" + +#: ../../source/tutorials/models/MiniMax-M2.md:90 +msgid "" +"Map your model weight directory into the container (the example maps it " +"to `/opt/data/verification/`)." +msgstr "将您的模型权重目录映射到容器中(示例中映射到 `/opt/data/verification/`)。" + +#: ../../source/tutorials/models/MiniMax-M2.md:125 +msgid "Online Inference on Multi-NPU" +msgstr "多 NPU 在线推理" + +#: ../../source/tutorials/models/MiniMax-M2.md:127 +msgid "" +"Below are recommended startup configurations for `MiniMax-M2.5`. Users " +"can simply change weights and model name to run this startup " +"configuration on `MiniMax-M2.7`. However it may not yet the best matchup " +"for `MiniMax-M2.7` if one is trying to reach the best performance." +msgstr "以下是 `MiniMax-M2.5` 的推荐启动配置。用户可以简单地更改权重和模型名称,即可在 `MiniMax-M2.7` 上运行此启动配置。但是,如果试图达到最佳性能,这可能还不是 `MiniMax-M2.7` 的最佳匹配。" + +#: ../../source/tutorials/models/MiniMax-M2.md:131 +msgid "" +"Below is a recommended startup configuration for short-context condition " +"like 3.5k/1.5k on `MiniMax-M2.5` to reach a good performance. If you wish" +" to run on long-context case, you may follow `Remarks` below to change " +"your config." +msgstr "以下是为 `MiniMax-M2.5` 在短上下文(如 3.5k/1.5k)条件下达到良好性能的推荐启动配置。如果您希望在长上下文情况下运行,可以按照下面的`备注`来更改配置。" + +#: ../../source/tutorials/models/MiniMax-M2.md:135 +msgid "" +"If you only care about short-context low latency, you can explicitly set " +"`--max-model-len 32768`. You may also set `tensor-parallel-size` to 16 " +"and set `data-parallel-size` to 1." +msgstr "如果您只关心短上下文的低延迟,可以显式设置 `--max-model-len 32768`。您也可以将 `tensor-parallel-size` 设置为 16,并将 `data-parallel-size` 设置为 1。" + +#: ../../source/tutorials/models/MiniMax-M2.md:136 +msgid "" +"`export VLLM_ASCEND_BALANCE_SCHEDULING=1` is used to enhance scheduling " +"capacity between prefill and decode. This will work remarkably with a " +"lager `data-parallel-size`. This can increace performance when " +"cuncurrency gets closer to values equals to `data-parallel-size` times " +"`max-num-seqs`." +msgstr "" +"`export VLLM_ASCEND_BALANCE_SCHEDULING=1` 用于增强预填充和解码之间的调度能力。这在 `data-parallel-size` " +"较大时效果显著。当并发数接近 `data-parallel-size` 乘以 `max-num-seqs` 的值时,这可以提高性能。" + +#: ../../source/tutorials/models/MiniMax-M2.md:137 +msgid "" +"Running the current Eagle3 weights for `MiniMax-M2.7` yields no " +"performance improvement; it is recommended to remove the " +"`--speculative_config`." +msgstr "为 `MiniMax-M2.7` 运行当前的 Eagle3 权重不会带来性能提升;建议移除 `--speculative_config`。" + +#: ../../source/tutorials/models/MiniMax-M2.md:174 +msgid "Remarks:" +msgstr "备注:" + +#: ../../source/tutorials/models/MiniMax-M2.md:176 +msgid "`minimax_m2_append_think` keeps `...` inside `content`." +msgstr "`minimax_m2_append_think` 会将 `...` 保留在 `content` 内部。" + +#: ../../source/tutorials/models/MiniMax-M2.md:177 +msgid "" +"If you mainly rely on the reasoning semantics of `/v1/responses`, it is " +"recommended to use `--reasoning-parser minimax_m2` instead." +msgstr "如果您主要依赖 `/v1/responses` 的推理语义,建议改用 `--reasoning-parser minimax_m2`。" + +#: ../../source/tutorials/models/MiniMax-M2.md:178 +msgid "" +"To receive a better performance on long-context like 128k or 64k, we " +"recommend to do changes as shown below, and you can remove `export " +"VLLM_ASCEND_BALANCE_SCHEDULING=1`." +msgstr "为了在 128k 或 64k 等长上下文上获得更好的性能,我们建议进行如下更改,并且您可以移除 `export VLLM_ASCEND_BALANCE_SCHEDULING=1`。" + +#: ../../source/tutorials/models/MiniMax-M2.md:193 +msgid "" +"If you will to test with `curl` command, you can add following commands " +"addition to start up command above." +msgstr "如果您想使用 `curl` 命令进行测试,可以在上述启动命令的基础上添加以下命令。" + +#: ../../source/tutorials/models/MiniMax-M2.md:201 +msgid "A2 (dual node, tp=8 + dp=2)" +msgstr "A2(双节点,tp=8 + dp=2)" + +#: ../../source/tutorials/models/MiniMax-M2.md:203 +msgid "" +"Since cross-node tensor parallelism (TP) can be unstable, the dual-node " +"guide uses a **tp=8 + dp=2** setup (8 NPUs per node, 16 NPUs total)." +msgstr "由于跨节点的张量并行(TP)可能不稳定,双节点指南采用 **tp=8 + dp=2** 的设置(每个节点 8 个 NPU,总共 16 个 NPU)。" + +#: ../../source/tutorials/models/MiniMax-M2.md:205 +msgid "Node0 (primary) startup script" +msgstr "Node0(主节点)启动脚本" + +#: ../../source/tutorials/models/MiniMax-M2.md:207 +msgid "" +"Edit `minimax25_service_node0.sh` inside the node0 container, and replace" +" the placeholders with your actual values:" +msgstr "在 node0 容器内编辑 `minimax25_service_node0.sh`,并将占位符替换为您的实际值:" + +#: ../../source/tutorials/models/MiniMax-M2.md:209 +#, python-brace-format +msgid "`{PrimaryNodeIP}`: the primary node's IP address (public/cluster network)" +msgstr "`{PrimaryNodeIP}`:主节点的 IP 地址(公共/集群网络)" + +#: ../../source/tutorials/models/MiniMax-M2.md:210 +#, python-brace-format +msgid "" +"`{NIC}`: the NIC name for the public/cluster network (check via " +"`ifconfig`, e.g., `enp67s0f0np0`)" +msgstr "`{NIC}`:公共/集群网络的网卡名称(通过 `ifconfig` 检查,例如 `enp67s0f0np0`)" + +#: ../../source/tutorials/models/MiniMax-M2.md:211 +msgid "`VLLM_TORCH_PROFILER_DIR`: optional, directory to store profiling outputs" +msgstr "`VLLM_TORCH_PROFILER_DIR`:可选,用于存储性能分析输出的目录" + +#: ../../source/tutorials/models/MiniMax-M2.md:260 +msgid "Node1 (secondary) startup script" +msgstr "Node1(从节点)启动脚本" + +#: ../../source/tutorials/models/MiniMax-M2.md:262 +msgid "Edit `minimax25_service_node1.sh` inside the node1 container:" +msgstr "在 node1 容器内编辑 `minimax25_service_node1.sh`:" + +#: ../../source/tutorials/models/MiniMax-M2.md:264 +#, python-brace-format +msgid "`{SecondaryNodeIP}`: the secondary node's IP address" +msgstr "`{SecondaryNodeIP}`:从节点的 IP 地址" + +#: ../../source/tutorials/models/MiniMax-M2.md:265 +#, python-brace-format +msgid "`{PrimaryNodeIP}`: the primary node's IP address (same as node0)" +msgstr "`{PrimaryNodeIP}`:主节点的 IP 地址(与 node0 相同)" + +#: ../../source/tutorials/models/MiniMax-M2.md:266 +#, python-brace-format +msgid "`{NIC}`: same as above" +msgstr "`{NIC}`:同上" + +#: ../../source/tutorials/models/MiniMax-M2.md:316 +msgid "Startup order" +msgstr "启动顺序" + +#: ../../source/tutorials/models/MiniMax-M2.md:318 +msgid "Start the service on both nodes:" +msgstr "在两个节点上启动服务:" + +#: ../../source/tutorials/models/MiniMax-M2.md:328 +msgid "After node0 prints `service start` in logs, you can verify the service." +msgstr "在 node0 的日志中打印出 `service start` 后,您可以验证服务。" + +#: ../../source/tutorials/models/MiniMax-M2.md:330 +msgid "Verify the Service" +msgstr "验证服务" + +#: ../../source/tutorials/models/MiniMax-M2.md:334 +msgid "Test with an OpenAI-compatible client:" +msgstr "使用 OpenAI 兼容的客户端进行测试:" + +#: ../../source/tutorials/models/MiniMax-M2.md:349 +msgid "Or send a request using curl:" +msgstr "或者使用 curl 发送请求:" + +#: ../../source/tutorials/models/MiniMax-M2.md:378 +msgid "A2 (dual node)" +msgstr "A2(双节点)" + +#: ../../source/tutorials/models/MiniMax-M2.md:380 +#, python-brace-format +msgid "" +"Run the following from any machine that can reach the primary node " +"(replace `{PrimaryNodeIP}` with the real IP):" +msgstr "从任何可以访问主节点的机器上运行以下命令(将 `{PrimaryNodeIP}` 替换为真实 IP):" + +#: ../../source/tutorials/models/MiniMax-M2.md:396 +msgid "Performance Reference (`MiniMax-M2.5`)" +msgstr "性能参考(`MiniMax-M2.5`)" + +#: ../../source/tutorials/models/MiniMax-M2.md:398 +msgid "A3 (single node, tp=16, 4k/1k@bs16)" +msgstr "A3(单节点,tp=16,4k/1k@bs16)" + +#: ../../source/tutorials/models/MiniMax-M2.md:400 +#: ../../source/tutorials/models/MiniMax-M2.md:446 +msgid "Results" +msgstr "结果" + +#: ../../source/tutorials/models/MiniMax-M2.md:402 +msgid "**Baseline** (`3.5k/1k@bs=217`)" +msgstr "**基线**(`3.5k/1k@bs=217`)" + +#: ../../source/tutorials/models/MiniMax-M2.md:382 +#: ../../source/tutorials/models/MiniMax-M2.md:427 +msgid "Metric" +msgstr "指标" + +#: ../../source/tutorials/models/MiniMax-M2.md:382 +#: ../../source/tutorials/models/MiniMax-M2.md:427 +msgid "Result" +msgstr "结果" + +#: ../../source/tutorials/models/MiniMax-M2.md:382 +msgid "Success/Failure" +msgstr "成功/失败" + +#: ../../source/tutorials/models/MiniMax-M2.md:382 +msgid "`217/0`" +msgstr "`217/0`" + +#: ../../source/tutorials/models/MiniMax-M2.md:382 +msgid "Mean TTFT" +msgstr "平均TTFT" + +#: ../../source/tutorials/models/MiniMax-M2.md:382 +msgid "`10316.56 ms`" +msgstr "`10316.56 毫秒`" + +#: ../../source/tutorials/models/MiniMax-M2.md:382 +msgid "Mean TPOT" +msgstr "平均TPOT" + +#: ../../source/tutorials/models/MiniMax-M2.md:382 +msgid "`34.28 ms`" +msgstr "`34.28 毫秒`" + +#: ../../source/tutorials/models/MiniMax-M2.md:382 +msgid "Output tok/s" +msgstr "输出令牌/秒" + +#: ../../source/tutorials/models/MiniMax-M2.md:382 +msgid "`4803.81`" +msgstr "`4803.81`" + +#: ../../source/tutorials/models/MiniMax-M2.md:382 +msgid "Total tok/s" +msgstr "总令牌/秒" + +#: ../../source/tutorials/models/MiniMax-M2.md:382 +msgid "`16096.59`" +msgstr "`16096.59`" + +#: ../../source/tutorials/models/MiniMax-M2.md:412 +msgid "**Long-context reference** (`190k/1k@bs=4`)" +msgstr "**长上下文参考** (`190k/1k@bs=4`)" + +#: ../../source/tutorials/models/MiniMax-M2.md:382 +msgid "`37.12`" +msgstr "`37.12`" + +#: ../../source/tutorials/models/MiniMax-M2.md:382 +msgid "`2002.37 ms`" +msgstr "`2002.37 毫秒`" + +#: ../../source/tutorials/models/MiniMax-M2.md:382 +msgid "`105.54 ms`" +msgstr "`105.54 毫秒`" + +#: ../../source/tutorials/models/MiniMax-M2.md:382 +msgid "Mean ITL" +msgstr "平均ITL" + +#: ../../source/tutorials/models/MiniMax-M2.md:421 +msgid "A2 (dual node, 190k/1k, concurrency=4, 16 prompts)" +msgstr "A2 (双节点,190k/1k,并发数=4,16个提示词)" + +#: ../../source/tutorials/models/MiniMax-M2.md:423 +msgid "Benchmark method" +msgstr "基准测试方法" + +#: ../../source/tutorials/models/MiniMax-M2.md:425 +msgid "Use vLLM bench for the **190k/1k, concurrency=4, 16 prompts** scenario:" +msgstr "使用 vLLM bench 进行 **190k/1k,并发数=4,16个提示词** 场景的测试:" + +#: ../../source/tutorials/models/MiniMax-M2.md:448 +msgid "**190k/1k, concurrency=4, 16 prompts**" +msgstr "**190k/1k,并发数=4,16个提示词**" + +#: ../../source/tutorials/models/MiniMax-M2.md:427 +msgid "TTFT (avg)" +msgstr "TTFT (平均)" + +#: ../../source/tutorials/models/MiniMax-M2.md:427 +msgid "3305.25 ms" +msgstr "3305.25 毫秒" + +#: ../../source/tutorials/models/MiniMax-M2.md:427 +msgid "TPOT (avg)" +msgstr "TPOT (平均)" + +#: ../../source/tutorials/models/MiniMax-M2.md:427 +msgid "109.83 ms" +msgstr "109.83 毫秒" + +#: ../../source/tutorials/models/MiniMax-M2.md:427 +msgid "Output throughput" +msgstr "输出吞吐量" + +#: ../../source/tutorials/models/MiniMax-M2.md:427 +msgid "35.29 tok/s" +msgstr "35.29 令牌/秒" + +#: ../../source/tutorials/models/MiniMax-M2.md:427 +msgid "Prefix hit rate" +msgstr "前缀命中率" + +#: ../../source/tutorials/models/MiniMax-M2.md:427 +msgid "85%" +msgstr "85%" + +#: ../../source/tutorials/models/MiniMax-M2.md:457 +msgid "FAQ" +msgstr "常见问题" + +#: ../../source/tutorials/models/MiniMax-M2.md:459 +msgid "**Q: What should I do if the output is garbled in EP mode?**" +msgstr "**问:在 EP 模式下输出乱码怎么办?**" + +#: ../../source/tutorials/models/MiniMax-M2.md:461 +msgid "" +"A: It is recommended to keep `--enable-expert-parallel` and " +"`VLLM_ASCEND_ENABLE_FLASHCOMM1=1`." +msgstr "答:建议保持启用 `--enable-expert-parallel` 并设置 `VLLM_ASCEND_ENABLE_FLASHCOMM1=1`。" + +#: ../../source/tutorials/models/MiniMax-M2.md:463 +msgid "" +"**Q: Why is the `reasoning` field often empty after using " +"`minimax_m2_append_think`?**" +msgstr "**问:为什么使用 `minimax_m2_append_think` 后 `reasoning` 字段经常为空?**" + +#: ../../source/tutorials/models/MiniMax-M2.md:465 +msgid "" +"A: This is expected. The parser keeps `...` inside " +"`content`. If you mainly rely on the reasoning semantics of " +"`/v1/responses`, use `--reasoning-parser minimax_m2` instead." +msgstr "答:这是预期行为。解析器会将 `...` 保留在 `content` 字段内。如果您主要依赖 `/v1/responses` 的推理语义,请改用 `--reasoning-parser minimax_m2`。" + +#: ../../source/tutorials/models/MiniMax-M2.md:467 +msgid "" +"**Q: Startup fails with HCCL port conflicts (address already bound). What" +" should I do?**" +msgstr "**问:启动失败,提示 HCCL 端口冲突(地址已被占用)。该怎么办?**" + +#: ../../source/tutorials/models/MiniMax-M2.md:469 +msgid "" +"A: Clean up old processes and restart: `pkill -f \"vllm serve " +"/models/MiniMax-M2.5\"`." +msgstr "答:清理旧进程并重启:`pkill -f \"vllm serve /models/MiniMax-M2.5\"`。" + +#: ../../source/tutorials/models/MiniMax-M2.md:471 +msgid "**Q: How to handle OOM or unstable startup?**" +msgstr "**问:如何处理 OOM 或启动不稳定?**" + +#: ../../source/tutorials/models/MiniMax-M2.md:473 +msgid "" +"A: Reduce `--max-num-seqs` and `--max-num-batched-tokens` first. If " +"needed, reduce concurrency and load-testing pressure (e.g., `max-" +"concurrency` / `num-prompts`)." +msgstr "答:首先降低 `--max-num-seqs` 和 `--max-num-batched-tokens`。如有需要,降低并发数和负载测试压力(例如,`max-concurrency` / `num-prompts`)。" + +#: ../../source/tutorials/models/MiniMax-M2.md:475 +msgid "**Q: Why not use cross-node tp=16?**" +msgstr "**问:为什么不使用跨节点 tp=16?**" + +#: ../../source/tutorials/models/MiniMax-M2.md:477 +msgid "" +"A: The referenced practice noted that cross-node TP may be unstable, so " +"`tp=8, dp=2` is recommended for dual-node deployment." +msgstr "答:参考实践指出跨节点 TP 可能不稳定,因此对于双节点部署,推荐使用 `tp=8, dp=2`。" + +#: ../../source/tutorials/models/MiniMax-M2.md:479 +msgid "**Q: How should I choose `--reasoning-parser`?**" +msgstr "**问:应该如何选择 `--reasoning-parser`?**" + +#: ../../source/tutorials/models/MiniMax-M2.md:481 +msgid "" +"A: This guide uses `minimax_m2_append_think` so that `...`" +" is kept in `content`. If you mainly rely on the reasoning semantics of " +"`/v1/responses`, consider using `--reasoning-parser minimax_m2`." +msgstr "答:本指南使用 `minimax_m2_append_think`,以便将 `...` 保留在 `content` 中。如果您主要依赖 `/v1/responses` 的推理语义,请考虑使用 `--reasoning-parser minimax_m2`。" + +#: ../../source/tutorials/models/MiniMax-M2.md:483 +msgid "**Q: Which ports must be accessible?**" +msgstr "**问:哪些端口必须可访问?**" + +#: ../../source/tutorials/models/MiniMax-M2.md:485 +msgid "" +"A: At minimum, expose the serving port (e.g., `20004`) and the data-" +"parallel RPC port (e.g., `2347`), and ensure the two nodes can reach each" +" other over the network." +msgstr "答:至少需要暴露服务端口(例如 `20004`)和数据并行 RPC 端口(例如 `2347`),并确保两个节点可以通过网络互相访问。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/PaddleOCR-VL.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/PaddleOCR-VL.po new file mode 100644 index 00000000..f31c674b --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/PaddleOCR-VL.po @@ -0,0 +1,265 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:1 +msgid "PaddleOCR-VL" +msgstr "PaddleOCR-VL" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:5 +msgid "" +"PaddleOCR-VL is a SOTA and resource-efficient model tailored for document" +" parsing. Its core component is PaddleOCR-VL-0.9B, a compact yet powerful" +" vision-language model (VLM) that integrates a NaViT-style dynamic " +"resolution visual encoder with the ERNIE-4.5-0.3B language model to " +"enable accurate element recognition." +msgstr "" +"PaddleOCR-VL 是一款专为文档解析设计的 SOTA 且资源高效的模型。其核心组件是 PaddleOCR-VL-0.9B,一个紧凑而强大的视觉语言模型(VLM),它集成了 NaViT 风格的动态分辨率视觉编码器和 ERNIE-4.5-0.3B 语言模型,以实现精确的元素识别。" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:7 +msgid "" +"This document provides a detailed workflow for the complete deployment " +"and verification of the model, including supported features, environment " +"preparation, single-node deployment, and functional verification. It is " +"designed to help users quickly complete model deployment and " +"verification." +msgstr "" +"本文档提供了完整的模型部署和验证的详细工作流程,包括支持的特性、环境准备、单节点部署和功能验证。旨在帮助用户快速完成模型部署和验证。" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:9 +msgid "Supported Features" +msgstr "支持的特性" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:11 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "" +"请参考[支持的特性](../../user_guide/support_matrix/supported_models.md)以获取模型支持的特性矩阵。" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:13 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "请参考[特性指南](../../user_guide/feature_guide/index.md)以获取特性的配置。" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:15 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:17 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:19 +msgid "" +"`PaddleOCR-VL-0.9B`: [PaddleOCR-" +"VL-0.9B](https://www.modelscope.cn/models/PaddlePaddle/PaddleOCR-VL)" +msgstr "" +"`PaddleOCR-VL-0.9B`: [PaddleOCR-VL-0.9B](https://www.modelscope.cn/models/PaddlePaddle/PaddleOCR-VL)" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:21 +msgid "" +"It is recommended to download the model weights to a local directory " +"(e.g., `./PaddleOCR-VL`) for quick access during deployment." +msgstr "建议将模型权重下载到本地目录(例如 `./PaddleOCR-VL`),以便在部署期间快速访问。" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:23 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:25 +msgid "You can use our official docker image to run `PaddleOCR-VL` directly." +msgstr "您可以使用我们的官方 docker 镜像直接运行 `PaddleOCR-VL`。" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:27 +msgid "" +"Select an image based on your machine type and start the docker image on " +"your node, refer to [using docker](../../installation.md#set-up-using-" +"docker)." +msgstr "根据您的机器类型选择镜像并在节点上启动 docker 镜像,请参考[使用 docker](../../installation.md#set-up-using-docker)。" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:51 +msgid "" +"The 310P device is supported from version 0.15.0rc1. You need to select " +"the corresponding image for installation." +msgstr "310P 设备从版本 0.15.0rc1 开始支持。您需要选择对应的镜像进行安装。" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:54 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:56 +msgid "Single-node Deployment" +msgstr "单节点部署" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:58 +msgid "Single NPU (PaddleOCR-VL)" +msgstr "单 NPU (PaddleOCR-VL)" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:60 +msgid "" +"PaddleOCR-VL supports single-node single-card deployment on the 910B4 and" +" 310P platform. Follow these steps to start the inference service:" +msgstr "PaddleOCR-VL 支持在 910B4 和 310P 平台上进行单节点单卡部署。请按照以下步骤启动推理服务:" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:62 +msgid "" +"Prepare model weights: Ensure the downloaded model weights are stored in " +"the `PaddleOCR-VL` directory." +msgstr "准备模型权重:确保下载的模型权重存储在 `PaddleOCR-VL` 目录中。" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:63 +msgid "Create and execute the deployment script (save as `deploy.sh`):" +msgstr "创建并执行部署脚本(保存为 `deploy.sh`):" + +#: ../../source/tutorials/models/PaddleOCR-VL.md +msgid "910B4" +msgstr "910B4" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:72 +msgid "Run the following script to start the vLLM server on single 910B4:" +msgstr "运行以下脚本在单张 910B4 上启动 vLLM 服务器:" + +#: ../../source/tutorials/models/PaddleOCR-VL.md +msgid "310P" +msgstr "310P" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:97 +msgid "Run the following script to start the vLLM server on single 310P:" +msgstr "运行以下脚本在单张 310P 上启动 vLLM 服务器:" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:116 +msgid "" +"The `--max_model_len` option is added to prevent errors when generating " +"the attention operator mask on the 310P device." +msgstr "添加 `--max_model_len` 选项是为了防止在 310P 设备上生成注意力算子掩码时出错。" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:121 +msgid "Multiple NPU (PaddleOCR-VL)" +msgstr "多 NPU (PaddleOCR-VL)" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:123 +msgid "Single-node deployment is recommended." +msgstr "推荐单节点部署。" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:125 +msgid "Prefill-Decode Disaggregation" +msgstr "Prefill-Decode 解耦" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:127 +msgid "Not supported yet." +msgstr "暂不支持。" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:129 +msgid "Functional Verification" +msgstr "功能验证" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:131 +msgid "If your service start successfully, you can see the info shown below:" +msgstr "如果您的服务启动成功,您将看到如下信息:" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:139 +msgid "" +"Once your server is started, you can use the OpenAI API client to make " +"queries." +msgstr "服务器启动后,您可以使用 OpenAI API 客户端进行查询。" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:184 +msgid "" +"If you query the server successfully, you can see the info shown below " +"(client):" +msgstr "如果您成功查询服务器,您将看到如下信息(客户端):" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:200 +msgid "Offline Inference with vLLM and PP-DocLayoutV2" +msgstr "使用 vLLM 和 PP-DocLayoutV2 进行离线推理" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:202 +msgid "" +"In the above example, we demonstrated how to use vLLM to infer the " +"PaddleOCR-VL-0.9B model. Typically, we also need to integrate the PP-" +"DocLayoutV2 model to fully unleash the capabilities of the PaddleOCR-VL " +"model, making it more consistent with the examples provided by the " +"official PaddlePaddle documentation." +msgstr "在上面的示例中,我们演示了如何使用 vLLM 推理 PaddleOCR-VL-0.9B 模型。通常,我们还需要集成 PP-DocLayoutV2 模型,以充分发挥 PaddleOCR-VL 模型的能力,使其更符合官方 PaddlePaddle 文档提供的示例。" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:205 +msgid "" +"Use separate virtual environments for VLLM and PP-DocLayoutV2 to prevent " +"dependency conflicts." +msgstr "为 VLLM 和 PP-DocLayoutV2 使用独立的虚拟环境,以防止依赖冲突。" + +#: ../../source/tutorials/models/PaddleOCR-VL.md +msgid "PaddlePaddle" +msgstr "PaddlePaddle" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:215 +msgid "The 910B4 device supports inference using the PaddlePaddle framework." +msgstr "910B4 设备支持使用 PaddlePaddle 框架进行推理。" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:217 +msgid "Pull the PaddlePaddle-compatible CANN image" +msgstr "拉取兼容 PaddlePaddle 的 CANN 镜像" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:223 +msgid "Start the container using the following command:" +msgstr "使用以下命令启动容器:" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:235 +msgid "" +"Install " +"[PaddlePaddle](https://www.paddlepaddle.org.cn/install/quick?docurl=undefined)" +" and [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)" +msgstr "" +"安装 [PaddlePaddle](https://www.paddlepaddle.org.cn/install/quick?docurl=undefined) 和 [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:246 +msgid "The OpenCV component may be missing:" +msgstr "可能缺少 OpenCV 组件:" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:253 +msgid "" +"CANN-8.0.0 does not support some versions of NumPy and OpenCV. It is " +"recommended to install the specified versions." +msgstr "CANN-8.0.0 不支持某些版本的 NumPy 和 OpenCV。建议安装指定版本。" + +#: ../../source/tutorials/models/PaddleOCR-VL.md +msgid "OM inference" +msgstr "OM 推理" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:264 +msgid "" +"The 310P device supports only the OM model inference. For details about " +"the process, see the guide provided in " +"[ModelZoo](https://gitcode.com/Ascend/ModelZoo-" +"PyTorch/tree/master/ACL_PyTorch/built-in/ocr/PP-DocLayoutV2)." +msgstr "310P 设备仅支持 OM 模型推理。有关该过程的详细信息,请参阅 [ModelZoo](https://gitcode.com/Ascend/ModelZoo-PyTorch/tree/master/ACL_PyTorch/built-in/ocr/PP-DocLayoutV2) 中提供的指南。" + +#: ../../source/tutorials/models/PaddleOCR-VL.md:268 +msgid "" +"Using vLLM as the backend, combined with PP-DocLayoutV2 for offline " +"inference" +msgstr "使用 vLLM 作为后端,结合 PP-DocLayoutV2 进行离线推理" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen-VL-Dense.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen-VL-Dense.po new file mode 100644 index 00000000..1921ae8e --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen-VL-Dense.po @@ -0,0 +1,360 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:1 +msgid "Qwen-VL-Dense(Qwen2.5VL-3B/7B, Qwen3-VL-2B/4B/8B/32B)" +msgstr "Qwen-VL-Dense (Qwen2.5VL-3B/7B, Qwen3-VL-2B/4B/8B/32B)" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:5 +msgid "" +"The Qwen-VL(Vision-Language)series from Alibaba Cloud comprises a family " +"of powerful Large Vision-Language Models (LVLMs) designed for " +"comprehensive multimodal understanding. They accept images, text, and " +"bounding boxes as input, and output text and detection boxes, enabling " +"advanced functions like image detection, multi-modal dialogue, and multi-" +"image reasoning." +msgstr "" +"阿里云的Qwen-VL(视觉-语言)系列是一组强大的大型视觉语言模型(LVLM),专为全面的多模态理解而设计。它们接受图像、文本和边界框作为输入,并输出文本和检测框,从而实现图像检测、多模态对话和多图像推理等高级功能。" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:7 +msgid "" +"This document will show the main verification steps of the model, " +"including supported features, feature configuration, environment " +"preparation, NPU deployment, accuracy and performance evaluation." +msgstr "本文档将展示该模型的主要验证步骤,包括支持的功能、功能配置、环境准备、NPU部署、精度和性能评估。" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:9 +msgid "" +"This tutorial uses the vLLM-Ascend `v0.11.0rc3-a3` version for " +"demonstration, showcasing the `Qwen3-VL-8B-Instruct` model as an example " +"for single NPU deployment and the `Qwen2.5-VL-32B-Instruct` model as an " +"example for multi-NPU deployment." +msgstr "本教程使用 vLLM-Ascend `v0.11.0rc3-a3` 版本进行演示,以 `Qwen3-VL-8B-Instruct` 模型为例展示单NPU部署,以 `Qwen2.5-VL-32B-Instruct` 模型为例展示多NPU部署。" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:11 +msgid "Supported Features" +msgstr "支持的功能" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:13 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考[支持的功能](../../user_guide/support_matrix/supported_models.md)以获取模型支持的功能矩阵。" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:15 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "请参考[功能指南](../../user_guide/feature_guide/index.md)以获取功能的配置信息。" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:17 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:19 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:21 +msgid "require 1 Atlas 800I A2 (64G × 8) node or 1 Atlas 800 A3 (64G × 16) node:" +msgstr "需要 1 个 Atlas 800I A2 (64G × 8) 节点或 1 个 Atlas 800 A3 (64G × 16) 节点:" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:23 +msgid "" +"`Qwen2.5-VL-3B-Instruct`: [Download model " +"weight](https://modelscope.cn/models/Qwen/Qwen2.5-VL-3B-Instruct)" +msgstr "`Qwen2.5-VL-3B-Instruct`: [下载模型权重](https://modelscope.cn/models/Qwen/Qwen2.5-VL-3B-Instruct)" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:24 +msgid "" +"`Qwen2.5-VL-7B-Instruct`: [Download model " +"weight](https://modelscope.cn/models/Qwen/Qwen2.5-VL-7B-Instruct)" +msgstr "`Qwen2.5-VL-7B-Instruct`: [下载模型权重](https://modelscope.cn/models/Qwen/Qwen2.5-VL-7B-Instruct)" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:25 +msgid "" +"`Qwen2.5-VL-32B-Instruct`:[Download model " +"weight](https://modelscope.cn/models/Qwen/Qwen2.5-VL-32B-Instruct)" +msgstr "`Qwen2.5-VL-32B-Instruct`:[下载模型权重](https://modelscope.cn/models/Qwen/Qwen2.5-VL-32B-Instruct)" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:26 +msgid "" +"`Qwen2.5-VL-72B-Instruct`:[Download model " +"weight](https://modelscope.cn/models/Qwen/Qwen2.5-VL-72B-Instruct)" +msgstr "`Qwen2.5-VL-72B-Instruct`:[下载模型权重](https://modelscope.cn/models/Qwen/Qwen2.5-VL-72B-Instruct)" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:27 +msgid "" +"`Qwen3-VL-2B-Instruct`: [Download model " +"weight](https://modelscope.cn/models/Qwen/Qwen3-VL-2B-Instruct)" +msgstr "`Qwen3-VL-2B-Instruct`: [下载模型权重](https://modelscope.cn/models/Qwen/Qwen3-VL-2B-Instruct)" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:28 +msgid "" +"`Qwen3-VL-4B-Instruct`: [Download model " +"weight](https://modelscope.cn/models/Qwen/Qwen3-VL-4B-Instruct)" +msgstr "`Qwen3-VL-4B-Instruct`: [下载模型权重](https://modelscope.cn/models/Qwen/Qwen3-VL-4B-Instruct)" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:29 +msgid "" +"`Qwen3-VL-8B-Instruct`: [Download model " +"weight](https://modelscope.cn/models/Qwen/Qwen3-VL-8B-Instruct)" +msgstr "`Qwen3-VL-8B-Instruct`: [下载模型权重](https://modelscope.cn/models/Qwen/Qwen3-VL-8B-Instruct)" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:30 +msgid "" +"`Qwen3-VL-32B-Instruct`: [Download model " +"weight](https://modelscope.cn/models/Qwen/Qwen3-VL-32B-Instruct)" +msgstr "`Qwen3-VL-32B-Instruct`: [下载模型权重](https://modelscope.cn/models/Qwen/Qwen3-VL-32B-Instruct)" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:32 +msgid "" +"A sample Qwen2.5-VL quantization script can be found in the modelslim " +"code repository. [Qwen2.5-VL Quantization Script " +"Example](https://gitcode.com/Ascend/msit/blob/master/msmodelslim/example/multimodal_vlm/Qwen2.5-VL/README.md)" +msgstr "可以在 modelslim 代码仓库中找到 Qwen2.5-VL 的量化脚本示例。[Qwen2.5-VL 量化脚本示例](https://gitcode.com/Ascend/msit/blob/master/msmodelslim/example/multimodal_vlm/Qwen2.5-VL/README.md)" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:34 +msgid "" +"It is recommended to download the model weight to the shared directory of" +" multiple nodes, such as `/root/.cache/`" +msgstr "建议将模型权重下载到多个节点的共享目录中,例如 `/root/.cache/`" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:36 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md +msgid "single-NPU" +msgstr "单NPU" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:45 +#: ../../source/tutorials/models/Qwen-VL-Dense.md:73 +msgid "Run docker container:" +msgstr "运行 Docker 容器:" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md +msgid "multi-NPU" +msgstr "多NPU" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:101 +msgid "Setup environment variables:" +msgstr "设置环境变量:" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:112 +msgid "" +"`max_split_size_mb` prevents the native allocator from splitting blocks " +"larger than this size (in MB). This can reduce fragmentation and may " +"allow some borderline workloads to complete without running out of " +"memory. You can find more details " +"[here](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/envref/envref_07_0061.html)." +msgstr "" +"`max_split_size_mb` 可防止原生分配器拆分大于此大小(以 MB 为单位)的内存块。这可以减少内存碎片,并可能使一些临界工作负载在内存耗尽前完成。您可以在" +"[此处](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/envref/envref_07_0061.html)找到更多详细信息。" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:115 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:117 +msgid "Offline Inference" +msgstr "离线推理" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md +msgid "Qwen3-VL-8B-Instruct" +msgstr "Qwen3-VL-8B-Instruct" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:126 +msgid "Run the following script to execute offline inference on single-NPU:" +msgstr "运行以下脚本在单NPU上执行离线推理:" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:191 +#: ../../source/tutorials/models/Qwen-VL-Dense.md:287 +msgid "If you run this script successfully, you can see the info shown below:" +msgstr "如果脚本运行成功,您将看到如下信息:" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md +msgid "Qwen2.5-VL-32B-Instruct" +msgstr "Qwen2.5-VL-32B-Instruct" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:221 +msgid "Run the following script to execute offline inference on multi-NPU:" +msgstr "运行以下脚本在多NPU上执行离线推理:" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:312 +msgid "Online Serving" +msgstr "在线服务" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:321 +msgid "Run docker container to start the vLLM server on single-NPU:" +msgstr "运行 Docker 容器以在单NPU上启动 vLLM 服务器:" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:332 +msgid "" +"Add `--max_model_len` option to avoid ValueError that the Qwen3-VL-8B-" +"Instruct model's max seq len (256000) is larger than the maximum number " +"of tokens that can be stored in KV cache. This will differ with different" +" NPU series based on the HBM size. Please modify the value according to a" +" suitable value for your NPU series." +msgstr "" +"添加 `--max_model_len` 选项以避免 ValueError,该错误提示 Qwen3-VL-8B-Instruct 模型的最大序列长度(256000)大于 KV 缓存可存储的最大令牌数。此值因不同 NPU 系列的 HBM 大小而异。请根据您 NPU 系列的合适值修改此值。" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:335 +#: ../../source/tutorials/models/Qwen-VL-Dense.md:422 +msgid "If your service start successfully, you can see the info shown below:" +msgstr "如果服务启动成功,您将看到如下信息:" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:343 +#: ../../source/tutorials/models/Qwen-VL-Dense.md:430 +msgid "Once your server is started, you can query the model with input prompts:" +msgstr "服务器启动后,您可以使用输入提示词查询模型:" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:360 +#: ../../source/tutorials/models/Qwen-VL-Dense.md:447 +msgid "" +"If you query the server successfully, you can see the info shown below " +"(client):" +msgstr "如果成功查询服务器,您将看到如下信息(客户端):" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:366 +#: ../../source/tutorials/models/Qwen-VL-Dense.md:453 +msgid "Logs of the vllm server:" +msgstr "vllm 服务器的日志:" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:381 +msgid "Run docker container to start the vLLM server on multi-NPU:" +msgstr "运行 Docker 容器以在多NPU上启动 vLLM 服务器:" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:419 +msgid "" +"Add `--max_model_len` option to avoid ValueError that the Qwen2.5-VL-32B-" +"Instruct model's max_model_len (128000) is larger than the maximum number" +" of tokens that can be stored in KV cache. This will differ with " +"different NPU series base on the HBM size. Please modify the value " +"according to a suitable value for your NPU series." +msgstr "" +"添加 `--max_model_len` 选项以避免 ValueError,该错误提示 Qwen2.5-VL-32B-Instruct 模型的最大模型长度(128000)大于 KV 缓存可存储的最大令牌数。此值因不同 NPU 系列的 HBM 大小而异。请根据您 NPU 系列的合适值修改此值。" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:468 +msgid "Accuracy Evaluation" +msgstr "精度评估" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:470 +msgid "Using Language Model Evaluation Harness" +msgstr "使用 Language Model Evaluation Harness" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:472 +msgid "" +"The accuracy of some models is already within our CI monitoring scope, " +"including:" +msgstr "部分模型的精度已纳入我们的 CI 监控范围,包括:" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:474 +msgid "`Qwen2.5-VL-7B-Instruct`" +msgstr "`Qwen2.5-VL-7B-Instruct`" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:475 +msgid "`Qwen3-VL-8B-Instruct`" +msgstr "`Qwen3-VL-8B-Instruct`" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:484 +msgid "" +"As an example, take the `mmmu_val` dataset as a test dataset, and run " +"accuracy evaluation of `Qwen3-VL-8B-Instruct` in offline mode." +msgstr "以 `mmmu_val` 数据集作为测试数据集为例,在离线模式下运行 `Qwen3-VL-8B-Instruct` 的精度评估。" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:486 +#: ../../source/tutorials/models/Qwen-VL-Dense.md:517 +msgid "" +"Refer to [Using " +"lm_eval](../../developer_guide/evaluation/using_lm_eval.md) for more " +"details on `lm_eval` installation." +msgstr "有关 `lm_eval` 安装的更多详细信息,请参考[使用 lm_eval](../../developer_guide/evaluation/using_lm_eval.md)。" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:492 +#: ../../source/tutorials/models/Qwen-VL-Dense.md:523 +msgid "Run `lm_eval` to execute the accuracy evaluation." +msgstr "运行 `lm_eval` 以执行精度评估。" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:505 +msgid "" +"After execution, you can get the result, here is the result of `Qwen3-VL-" +"8B-Instruct` in `vllm-ascend:0.11.0rc3` for reference only." +msgstr "执行后,您将获得结果。以下是 `vllm-ascend:0.11.0rc3` 中 `Qwen3-VL-8B-Instruct` 的结果,仅供参考。" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:515 +msgid "" +"As an example, take the `mmmu_val` dataset as a test dataset, and run " +"accuracy evaluation of `Qwen2.5-VL-32B-Instruct` in offline mode." +msgstr "以 `mmmu_val` 数据集作为测试数据集为例,在离线模式下运行 `Qwen2.5-VL-32B-Instruct` 的精度评估。" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:535 +msgid "" +"After execution, you can get the result, here is the result of `Qwen2.5" +"-VL-32B-Instruct` in `vllm-ascend:0.11.0rc3` for reference only." +msgstr "执行后,您将获得结果。以下是 `vllm-ascend:0.11.0rc3` 中 `Qwen2.5-VL-32B-Instruct` 的结果,仅供参考。" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:543 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:545 +msgid "Using vLLM Benchmark" +msgstr "使用 vLLM Benchmark" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:547 +msgid "" +"Refer to [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/) " +"for more details." +msgstr "更多详细信息,请参考 [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/)。" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:549 +msgid "There are three `vllm bench` subcommands:" +msgstr "`vllm bench` 有三个子命令:" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:551 +msgid "`latency`: Benchmark the latency of a single batch of requests." +msgstr "`latency`: 对单批请求的延迟进行基准测试。" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:552 +msgid "`serve`: Benchmark the online serving throughput." +msgstr "`serve`: 对在线服务吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:553 +msgid "`throughput`: Benchmark offline inference throughput." +msgstr "`throughput`: 对离线推理吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:555 +msgid "" +"The performance evaluation must be conducted in an online mode. Take the " +"`serve` as an example. Run the code as follows." +msgstr "性能评估必须在在线模式下进行。以 `serve` 为例。按如下方式运行代码。" + +#: ../../source/tutorials/models/Qwen-VL-Dense.md:578 +msgid "" +"After about several minutes, you can get the performance evaluation " +"result." +msgstr "大约几分钟后,您将获得性能评估结果。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen2.5-7B.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen2.5-7B.po new file mode 100644 index 00000000..573d9754 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen2.5-7B.po @@ -0,0 +1,279 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:1 +msgid "Qwen2.5-7B" +msgstr "Qwen2.5-7B" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:5 +msgid "" +"Qwen2.5-7B-Instruct is the flagship instruction-tuned variant of Alibaba " +"Cloud’s Qwen 2.5 LLM series. It supports a maximum context window of " +"128K, enables generation of up to 8K tokens, and delivers enhanced " +"capabilities in multilingual processing, instruction following, " +"programming, mathematical computation, and structured data handling." +msgstr "" +"Qwen2.5-7B-Instruct 是阿里云 Qwen 2.5 大语言模型系列的旗舰指令调优变体。它支持最大 128K 的上下文窗口,能够生成最多 8K 个令牌,并在多语言处理、指令遵循、编程、数学计算和结构化数据处理方面提供增强的能力。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:7 +msgid "" +"This document details the complete deployment and verification workflow " +"for the model, including supported features, environment preparation, " +"single-node deployment, functional verification, accuracy and performance" +" evaluation, and troubleshooting of common issues. It is designed to help" +" users quickly complete model deployment and validation." +msgstr "" +"本文档详细介绍了该模型的完整部署和验证工作流程,包括支持的功能、环境准备、单节点部署、功能验证、准确性和性能评估以及常见问题排查。旨在帮助用户快速完成模型部署和验证。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:9 +msgid "The `Qwen2.5-7B-Instruct` model was supported since `vllm-ascend:v0.9.0`." +msgstr "`Qwen2.5-7B-Instruct` 模型自 `vllm-ascend:v0.9.0` 版本起获得支持。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:11 +msgid "Supported Features" +msgstr "支持的功能" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:13 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考[支持的功能](../../user_guide/support_matrix/supported_models.md)以获取模型支持的功能矩阵。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:15 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "请参考[功能指南](../../user_guide/feature_guide/index.md)以获取功能的配置信息。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:17 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:19 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:21 +msgid "" +"`Qwen2.5-7B-Instruct`(BF16 version): require 1 Atlas 910B4 (32G × 1) " +"card. [Download model weight](https://modelscope.cn/models/Qwen/Qwen2.5" +"-7B-Instruct)" +msgstr "" +"`Qwen2.5-7B-Instruct`(BF16 版本):需要 1 张 Atlas 910B4(32G × 1)卡。[下载模型权重](https://modelscope.cn/models/Qwen/Qwen2.5-7B-Instruct)" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:23 +msgid "" +"It is recommended to download the model weights to a local directory " +"(e.g., `./Qwen2.5-7B-Instruct/`) for quick access during deployment." +msgstr "建议将模型权重下载到本地目录(例如 `./Qwen2.5-7B-Instruct/`),以便在部署期间快速访问。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:25 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:27 +msgid "" +"You can use our official docker image and install extra operator for " +"supporting `Qwen2.5-7B-Instruct`." +msgstr "您可以使用我们的官方 docker 镜像,并安装额外的算子以支持 `Qwen2.5-7B-Instruct`。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md +msgid "A3 series" +msgstr "A3 系列" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:36 +#: ../../source/tutorials/models/Qwen2.5-7B.md:64 +msgid "Start the docker image on your each node." +msgstr "在您的每个节点上启动 docker 镜像。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md +msgid "A2 series" +msgstr "A2 系列" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:90 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:92 +msgid "Single-node Deployment" +msgstr "单节点部署" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:94 +msgid "" +"Qwen2.5-7B-Instruct supports single-node single-card deployment on the " +"910B4 platform. Follow these steps to start the inference service:" +msgstr "Qwen2.5-7B-Instruct 支持在 910B4 平台上进行单节点单卡部署。请按照以下步骤启动推理服务:" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:96 +msgid "" +"Prepare model weights: Ensure the downloaded model weights are stored in " +"the `./Qwen2.5-7B-Instruct/` directory." +msgstr "准备模型权重:确保下载的模型权重存储在 `./Qwen2.5-7B-Instruct/` 目录中。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:97 +msgid "Create and execute the deployment script (save as `deploy.sh`):" +msgstr "创建并执行部署脚本(保存为 `deploy.sh`):" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:112 +msgid "Multi-node Deployment" +msgstr "多节点部署" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:114 +msgid "Single-node deployment is recommended." +msgstr "推荐使用单节点部署。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:116 +msgid "Prefill-Decode Disaggregation" +msgstr "预填充-解码分离" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:118 +msgid "Not supported yet." +msgstr "暂不支持。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:120 +msgid "Functional Verification" +msgstr "功能验证" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:122 +msgid "After starting the service, verify functionality using a `curl` request:" +msgstr "启动服务后,使用 `curl` 请求验证功能:" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:135 +msgid "" +"A valid response (e.g., `\"Beijing is a vibrant and historic capital " +"city\"`) indicates successful deployment." +msgstr "有效的响应(例如 `\"Beijing is a vibrant and historic capital city\"`)表明部署成功。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:137 +msgid "Accuracy Evaluation" +msgstr "准确性评估" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:139 +#: ../../source/tutorials/models/Qwen2.5-7B.md:151 +msgid "Using AISBench" +msgstr "使用 AISBench" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:141 +msgid "" +"Refer to [Using " +"AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " +"details." +msgstr "详情请参考[使用 AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:143 +msgid "" +"Results and logs are saved to `benchmark/outputs/default/`. A sample " +"accuracy report is shown below:" +msgstr "结果和日志保存在 `benchmark/outputs/default/` 中。示例如下:" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:66 +msgid "dataset" +msgstr "数据集" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:66 +msgid "version" +msgstr "版本" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:66 +msgid "metric" +msgstr "指标" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:66 +msgid "mode" +msgstr "模式" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:66 +msgid "vllm-api-general-chat" +msgstr "vllm-api-general-chat" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:66 +msgid "gsm8k" +msgstr "gsm8k" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:66 +msgid "-" +msgstr "-" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:66 +msgid "accuracy" +msgstr "准确率" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:66 +msgid "gen" +msgstr "生成" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:66 +msgid "75.00" +msgstr "75.00" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:149 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:153 +msgid "" +"Refer to [Using AISBench for performance " +"evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation) for details." +msgstr "详情请参考[使用 AISBench 进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:155 +msgid "Using vLLM Benchmark" +msgstr "使用 vLLM Benchmark" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:157 +msgid "Run performance evaluation of `Qwen2.5-7B-Instruct` as an example." +msgstr "以运行 `Qwen2.5-7B-Instruct` 的性能评估为例。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:159 +msgid "" +"Refer to [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/) " +"for more details." +msgstr "更多详情请参考 [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/)。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:161 +msgid "There are three `vllm bench` subcommands:" +msgstr "`vllm bench` 有三个子命令:" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:163 +msgid "`latency`: Benchmark the latency of a single batch of requests." +msgstr "`latency`:对单批请求的延迟进行基准测试。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:164 +msgid "`serve`: Benchmark the online serving throughput." +msgstr "`serve`:对在线服务吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:165 +msgid "`throughput`: Benchmark offline inference throughput." +msgstr "`throughput`:对离线推理吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:167 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例。按如下方式运行代码。" + +#: ../../source/tutorials/models/Qwen2.5-7B.md:180 +msgid "After several minutes, you can get the performance evaluation result." +msgstr "几分钟后,您将获得性能评估结果。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen2.5-Omni.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen2.5-Omni.po new file mode 100644 index 00000000..1e9fe06b --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen2.5-Omni.po @@ -0,0 +1,301 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:1 +msgid "Qwen2.5-Omni-7B" +msgstr "Qwen2.5-Omni-7B" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:5 +msgid "" +"Qwen2.5-Omni is an end-to-end multimodal model designed to perceive " +"diverse modalities, including text, images, audio, and video, while " +"simultaneously generating text and natural speech responses in a " +"streaming manner." +msgstr "Qwen2.5-Omni 是一个端到端的多模态模型,旨在感知多种模态,包括文本、图像、音频和视频,同时以流式方式生成文本和自然语音响应。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:7 +msgid "" +"The `Qwen2.5-Omni` model was supported since `vllm-ascend:v0.11.0rc0`. " +"This document will show the main verification steps of the model, " +"including supported features, feature configuration, environment " +"preparation, single-NPU and multi-NPU deployment, accuracy and " +"performance evaluation." +msgstr "`Qwen2.5-Omni` 模型自 `vllm-ascend:v0.11.0rc0` 版本起获得支持。本文档将展示该模型的主要验证步骤,包括支持的特性、特性配置、环境准备、单NPU和多NPU部署、精度和性能评估。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:9 +msgid "Supported Features" +msgstr "支持的特性" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:11 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考[支持的特性](../../user_guide/support_matrix/supported_models.md)以获取该模型支持的特性矩阵。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:13 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "请参考[特性指南](../../user_guide/feature_guide/index.md)以获取特性的配置方法。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:15 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:17 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:19 +msgid "" +"`Qwen2.5-Omni-3B`(BF16): [Download model " +"weight](https://modelscope.cn/models/Qwen/Qwen2.5-Omni-3B)" +msgstr "`Qwen2.5-Omni-3B`(BF16): [下载模型权重](https://modelscope.cn/models/Qwen/Qwen2.5-Omni-3B)" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:20 +msgid "" +"`Qwen2.5-Omni-7B`(BF16): [Download model " +"weight](https://modelscope.cn/models/Qwen/Qwen2.5-Omni-7B)" +msgstr "`Qwen2.5-Omni-7B`(BF16): [下载模型权重](https://modelscope.cn/models/Qwen/Qwen2.5-Omni-7B)" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:22 +msgid "Following examples use the 7B version by default." +msgstr "以下示例默认使用 7B 版本。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:24 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:26 +msgid "You can use our official docker image to run `Qwen2.5-Omni` directly." +msgstr "您可以使用我们的官方 docker 镜像直接运行 `Qwen2.5-Omni`。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:28 +msgid "" +"Select an image based on your machine type and start the docker image on " +"your node, refer to [using docker](../../installation.md#set-up-using-" +"docker)." +msgstr "根据您的机器类型选择镜像并在节点上启动 docker 镜像,请参考[使用 docker](../../installation.md#set-up-using-docker)。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:65 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:67 +msgid "Single-node Deployment" +msgstr "单节点部署" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:69 +msgid "Single NPU (Qwen2.5-Omni-7B)" +msgstr "单 NPU (Qwen2.5-Omni-7B)" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:72 +msgid "" +"The **environment variable** `LOCAL_MEDIA_PATH` which **allows** API " +"requests to read local images or videos from directories specified by the" +" server file system. Please note this is a security risk. Should only be " +"enabled in trusted environments." +msgstr "**环境变量** `LOCAL_MEDIA_PATH` **允许** API 请求从服务器文件系统指定的目录读取本地图像或视频。请注意,这存在安全风险。应仅在受信任的环境中启用。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:92 +msgid "" +"Now vllm-ascend docker image should contain vllm[audio] build part, if " +"you encounter *audio not supported issue* by any chance, please re-build " +"vllm with [audio] flag." +msgstr "当前 vllm-ascend docker 镜像应包含 vllm[audio] 构建部分,如果您遇到*音频不支持的问题*,请使用 [audio] 标志重新构建 vllm。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:100 +msgid "" +"`--allowed-local-media-path` is optional, only set it if you need infer " +"model with local media file." +msgstr "`--allowed-local-media-path` 是可选的,仅在需要使用本地媒体文件进行模型推理时设置。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:102 +msgid "" +"`--gpu-memory-utilization` should not be set manually unless you know " +"what this parameter does." +msgstr "`--gpu-memory-utilization` 不应手动设置,除非您了解此参数的作用。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:104 +msgid "Multiple NPU (Qwen2.5-Omni-7B)" +msgstr "多 NPU (Qwen2.5-Omni-7B)" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:123 +msgid "" +"`--tensor_parallel_size` no need to set for this 7B model, but if you " +"really need tensor parallel, tp size can be one of `1/2/4`." +msgstr "对于此 7B 模型,无需设置 `--tensor_parallel_size`,但如果确实需要张量并行,tp 大小可以是 `1/2/4` 之一。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:125 +msgid "Prefill-Decode Disaggregation" +msgstr "预填充-解码分离" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:127 +msgid "Not supported yet." +msgstr "暂不支持。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:129 +msgid "Functional Verification" +msgstr "功能验证" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:131 +msgid "If your service **starts** successfully, you can see the info shown below:" +msgstr "如果您的服务**启动**成功,您可以看到如下所示的信息:" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:139 +msgid "Once your server is started, you can query the model with input prompts:" +msgstr "一旦您的服务器启动,您可以使用输入提示词查询模型:" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:167 +msgid "" +"If you query the server successfully, you can see the info shown below " +"(client):" +msgstr "如果您成功查询服务器,您可以看到如下所示的信息(客户端):" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:173 +msgid "Accuracy Evaluation" +msgstr "精度评估" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:175 +msgid "Qwen2.5-Omni on vllm-ascend has been tested on AISBench." +msgstr "vllm-ascend 上的 Qwen2.5-Omni 已在 AISBench 上进行了测试。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:177 +#: ../../source/tutorials/models/Qwen2.5-Omni.md:190 +msgid "Using AISBench" +msgstr "使用 AISBench" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:179 +msgid "" +"Refer to [Using " +"AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " +"details." +msgstr "详情请参考[使用 AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:181 +msgid "" +"After execution, you can get the result, here is the result of `Qwen2.5" +"-Omni-7B` with `vllm-ascend:0.11.0rc0` for reference only." +msgstr "执行后,您可以获得结果,以下是 `Qwen2.5-Omni-7B` 在 `vllm-ascend:0.11.0rc0` 上的结果,仅供参考。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:91 +msgid "dataset" +msgstr "数据集" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:91 +msgid "platform" +msgstr "平台" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:91 +msgid "metric" +msgstr "指标" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:91 +msgid "mode" +msgstr "模式" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:91 +msgid "vllm-api-stream-chat" +msgstr "vllm-api-stream-chat" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:91 +msgid "textVQA" +msgstr "textVQA" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:91 +msgid "A2" +msgstr "A2" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:91 +msgid "accuracy" +msgstr "精度" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:91 +msgid "gen_base64" +msgstr "gen_base64" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:91 +msgid "83.47" +msgstr "83.47" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:91 +msgid "A3" +msgstr "A3" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:91 +msgid "84.04" +msgstr "84.04" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:188 +msgid "Performance Evaluation" +msgstr "性能评估" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:192 +msgid "" +"Refer to [Using AISBench for performance " +"evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation) for details." +msgstr "详情请参考[使用 AISBench 进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:194 +msgid "Using vLLM Benchmark" +msgstr "使用 vLLM Benchmark" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:196 +msgid "Run performance evaluation of `Qwen2.5-Omni-7B` as an example." +msgstr "以运行 `Qwen2.5-Omni-7B` 的性能评估为例。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:198 +msgid "" +"Refer to [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/) " +"for more details." +msgstr "更多详情请参考 [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/)。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:200 +msgid "There are three `vllm bench` subcommands:" +msgstr "`vllm bench` 有三个子命令:" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:202 +msgid "`latency`: Benchmark the latency of a single batch of requests." +msgstr "`latency`:对单批请求的延迟进行基准测试。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:203 +msgid "`serve`: Benchmark the online serving throughput." +msgstr "`serve`:对在线服务吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:204 +msgid "`throughput`: Benchmark offline inference throughput." +msgstr "`throughput`:对离线推理吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:206 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例。按如下方式运行代码。" + +#: ../../source/tutorials/models/Qwen2.5-Omni.md:212 +msgid "" +"After about several minutes, you can get the performance evaluation " +"result." +msgstr "大约几分钟后,您就可以获得性能评估结果。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-235B-A22B.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-235B-A22B.po new file mode 100644 index 00000000..a4db013d --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-235B-A22B.po @@ -0,0 +1,665 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:1 +msgid "Qwen3-235B-A22B" +msgstr "Qwen3-235B-A22B" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:5 +msgid "" +"Qwen3 is the latest generation of large language models in Qwen series, " +"offering a comprehensive suite of dense and mixture-of-experts (MoE) " +"models. Built upon extensive training, Qwen3 delivers groundbreaking " +"advancements in reasoning, instruction-following, agent capabilities, and" +" multilingual support." +msgstr "" +"Qwen3 是 Qwen 系列最新一代的大语言模型,提供了一套完整的稠密模型和专家混合模型。基于广泛的训练,Qwen3 在推理、指令遵循、智能体能力和多语言支持方面实现了突破性进展。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:7 +msgid "" +"This document will show the main verification steps of the model, " +"including supported features, feature configuration, environment " +"preparation, single-node and multi-node deployment, accuracy and " +"performance evaluation." +msgstr "本文档将展示该模型的主要验证步骤,包括支持的特性、特性配置、环境准备、单节点与多节点部署、精度和性能评估。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:9 +msgid "The `Qwen3-235B-A22B` model is first supported in `vllm-ascend:v0.8.4rc2`." +msgstr "`Qwen3-235B-A22B` 模型首次在 `vllm-ascend:v0.8.4rc2` 版本中得到支持。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:11 +msgid "Supported Features" +msgstr "支持的特性" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:13 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考[支持的特性](../../user_guide/support_matrix/supported_models.md)以获取该模型的支持特性矩阵。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:15 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "请参考[特性指南](../../user_guide/feature_guide/index.md)以获取特性的配置方法。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:17 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:19 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:21 +msgid "" +"`Qwen3-235B-A22B`(BF16 version): require 1 Atlas 800 A3 (64G × 16) node, " +"1 Atlas 800 A2 (64G × 8) node or 2 Atlas 800 A2(32G × 8)nodes. [Download " +"model weight](https://www.modelscope.cn/models/Qwen/Qwen3-235B-A22B)" +msgstr "" +"`Qwen3-235B-A22B`(BF16 版本):需要 1 个 Atlas 800 A3 (64G × 16) 节点、1 个 Atlas 800 A2 (64G × 8) 节点或 2 个 Atlas 800 A2(32G × 8) 节点。[下载模型权重](https://www.modelscope.cn/models/Qwen/Qwen3-235B-A22B)" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:22 +msgid "" +"`Qwen3-235B-A22B-w8a8`(Quantized version): require 1 Atlas 800 A3 (64G × " +"16) node or 1 Atlas 800 A2 (64G × 8) node or 2 Atlas 800 A2(32G × " +"8)nodes. [Download model weight](https://modelscope.cn/models/vllm-" +"ascend/Qwen3-235B-A22B-W8A8)" +msgstr "" +"`Qwen3-235B-A22B-w8a8`(量化版本):需要 1 个 Atlas 800 A3 (64G × 16) 节点、1 个 Atlas 800 A2 (64G × 8) 节点或 2 个 Atlas 800 A2(32G × 8) 节点。[下载模型权重](https://modelscope.cn/models/vllm-ascend/Qwen3-235B-A22B-W8A8)" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:24 +msgid "" +"It is recommended to download the model weight to the shared directory of" +" multiple nodes, such as `/root/.cache/`." +msgstr "建议将模型权重下载到多节点的共享目录中,例如 `/root/.cache/`。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:26 +msgid "Verify Multi-node Communication(Optional)" +msgstr "验证多节点通信(可选)" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:28 +msgid "" +"If you want to deploy multi-node environment, you need to verify multi-" +"node communication according to [verify multi-node communication " +"environment](../../installation.md#verify-multi-node-communication)." +msgstr "如果您想部署多节点环境,需要根据[验证多节点通信环境](../../installation.md#verify-multi-node-communication)来验证多节点通信。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:30 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md +msgid "Use docker image" +msgstr "使用 Docker 镜像" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:36 +msgid "" +"For example, using images `quay.io/ascend/vllm-ascend:v0.11.0rc2`(for " +"Atlas 800 A2) and `quay.io/ascend/vllm-ascend:v0.11.0rc2-a3`(for Atlas " +"800 A3)." +msgstr "例如,使用镜像 `quay.io/ascend/vllm-ascend:v0.11.0rc2`(适用于 Atlas 800 A2)和 `quay.io/ascend/vllm-ascend:v0.11.0rc2-a3`(适用于 Atlas 800 A3)。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:38 +msgid "" +"Select an image based on your machine type and start the docker image on " +"your node, refer to [using docker](../../installation.md#set-up-using-" +"docker)." +msgstr "根据您的机器类型选择镜像并在节点上启动 Docker 容器,请参考[使用 Docker](../../installation.md#set-up-using-docker)。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md +msgid "Build from source" +msgstr "从源码构建" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:78 +msgid "You can build all from source." +msgstr "您可以从源码构建所有组件。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:80 +msgid "" +"Install `vllm-ascend`, refer to [set up using " +"python](../../installation.md#set-up-using-python)." +msgstr "安装 `vllm-ascend`,请参考[使用 Python 设置](../../installation.md#set-up-using-python)。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:84 +msgid "" +"If you want to deploy multi-node environment, you need to set up " +"environment on each node." +msgstr "如果您想部署多节点环境,需要在每个节点上设置环境。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:86 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:88 +msgid "Single-node Deployment" +msgstr "单节点部署" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:90 +msgid "" +"`Qwen3-235B-A22B` and `Qwen3-235B-A22B-w8a8` can both be deployed on 1 " +"Atlas 800 A3(64G*16), 1 Atlas 800 A2(64G*8). Quantized version need to " +"start with parameter `--quantization ascend`." +msgstr "`Qwen3-235B-A22B` 和 `Qwen3-235B-A22B-w8a8` 都可以部署在 1 个 Atlas 800 A3(64G*16) 或 1 个 Atlas 800 A2(64G*8) 上。量化版本需要使用参数 `--quantization ascend` 启动。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:93 +msgid "Run the following script to execute online 128k inference." +msgstr "运行以下脚本来执行在线 128k 推理。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:126 +msgid "**Notice:**" +msgstr "**注意:**" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:128 +msgid "" +"[Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B#processing-" +"long-texts) originally only supports 40960 " +"context(max_position_embeddings). If you want to use it and its related " +"quantization weights to run long seqs (such as 128k context), it is " +"required to use yarn rope-scaling technique." +msgstr "" +"[Qwen3-235B-A22B](https://huggingface.co/Qwen/Qwen3-235B-A22B#processing-long-texts) 原本仅支持 40960 上下文长度(max_position_embeddings)。如果您想使用它及其相关的量化权重来运行长序列(例如 128k 上下文),需要使用 yarn rope-scaling 技术。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:129 +#, python-brace-format +msgid "" +"For vLLM version same as or new than `v0.12.0`, use parameter: `--hf-" +"overrides '{\"rope_parameters\": " +"{\"rope_type\":\"yarn\",\"rope_theta\":1000000,\"factor\":4,\"original_max_position_embeddings\":32768}}'" +" \\`." +msgstr "" +"对于 `v0.12.0` 及以上版本的 vLLM,使用参数:`--hf-overrides '{\"rope_parameters\": " +"{\"rope_type\":\"yarn\",\"rope_theta\":1000000,\"factor\":4,\"original_max_position_embeddings\":32768}}' \\`。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:130 +#, python-brace-format +msgid "" +"For vllm version below `v0.12.0`, use parameter: `--rope_scaling " +"'{\"rope_type\":\"yarn\",\"factor\":4,\"original_max_position_embeddings\":32768}'" +" \\`. If you are using weights like [Qwen3-235B-A22B-" +"Instruct-2507](https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507)" +" which originally supports long contexts, there is no need to add this " +"parameter." +msgstr "" +"对于 `v0.12.0` 以下版本的 vLLM,使用参数:`--rope_scaling " +"'{\"rope_type\":\"yarn\",\"factor\":4,\"original_max_position_embeddings\":32768}' \\`。如果您使用的是像 [Qwen3-235B-A22B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507) 这样原本就支持长上下文的权重,则无需添加此参数。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:133 +msgid "The parameters are explained as follows:" +msgstr "参数解释如下:" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:135 +msgid "" +"`--data-parallel-size` 1 and `--tensor-parallel-size` 8 are common " +"settings for data parallelism (DP) and tensor parallelism (TP) sizes." +msgstr "`--data-parallel-size` 1 和 `--tensor-parallel-size` 8 是数据并行(DP)和张量并行(TP)大小的常见设置。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:136 +msgid "" +"`--max-model-len` represents the context length, which is the maximum " +"value of the input plus output for a single request." +msgstr "`--max-model-len` 表示上下文长度,即单个请求的输入加输出的最大值。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:137 +msgid "" +"`--max-num-seqs` indicates the maximum number of requests that each DP " +"group is allowed to process. If the number of requests sent to the " +"service exceeds this limit, the excess requests will remain in a waiting " +"state and will not be scheduled. Note that the time spent in the waiting " +"state is also counted in metrics such as TTFT and TPOT. Therefore, when " +"testing performance, it is generally recommended that `--max-num-seqs` * " +"`--data-parallel-size` >= the actual total concurrency." +msgstr "" +"`--max-num-seqs` 表示每个 DP 组允许处理的最大请求数。如果发送到服务的请求数超过此限制,超出的请求将保持在等待状态,不会被调度。请注意,在等待状态所花费的时间也会计入 TTFT 和 TPOT 等指标。因此,在测试性能时,通常建议 `--max-num-seqs` * `--data-parallel-size` >= 实际总并发数。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:138 +msgid "" +"`--max-num-batched-tokens` represents the maximum number of tokens that " +"the model can process in a single step. Currently, vLLM v1 scheduling " +"enables ChunkPrefill/SplitFuse by default, which means:" +msgstr "`--max-num-batched-tokens` 表示模型在单步中可以处理的最大 token 数。目前,vLLM v1 调度默认启用 ChunkPrefill/SplitFuse,这意味着:" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:139 +msgid "" +"(1) If the input length of a request is greater than `--max-num-batched-" +"tokens`, it will be divided into multiple rounds of computation according" +" to `--max-num-batched-tokens`;" +msgstr "(1) 如果一个请求的输入长度大于 `--max-num-batched-tokens`,它将根据 `--max-num-batched-tokens` 被分成多轮计算;" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:140 +msgid "" +"(2) Decode requests are prioritized for scheduling, and prefill requests " +"are scheduled only if there is available capacity." +msgstr "(2) 解码请求优先被调度,只有在有可用容量时才会调度预填充请求。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:141 +msgid "" +"Generally, if `--max-num-batched-tokens` is set to a larger value, the " +"overall latency will be lower, but the pressure on GPU memory (activation" +" value usage) will be greater." +msgstr "通常,如果将 `--max-num-batched-tokens` 设置为较大的值,整体延迟会更低,但 GPU 内存(激活值使用)的压力会更大。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:142 +msgid "" +"`--gpu-memory-utilization` represents the proportion of HBM that vLLM " +"will use for actual inference. Its essential function is to calculate the" +" available kv_cache size. During the warm-up phase (referred to as " +"profile run in vLLM), vLLM records the peak GPU memory usage during an " +"inference process with an input size of `--max-num-batched-tokens`. The " +"available kv_cache size is then calculated as: `--gpu-memory-utilization`" +" * HBM size - peak GPU memory usage. Therefore, the larger the value of " +"`--gpu-memory-utilization`, the more kv_cache can be used. However, since" +" the GPU memory usage during the warm-up phase may differ from that " +"during actual inference (e.g., due to uneven EP load), setting `--gpu-" +"memory-utilization` too high may lead to OOM (Out of Memory) issues " +"during actual inference. The default value is `0.9`." +msgstr "" +"`--gpu-memory-utilization` 表示 vLLM 将用于实际推理的 HBM 比例。其核心功能是计算可用的 kv_cache 大小。在预热阶段(在 vLLM 中称为 profile run),vLLM 会记录输入大小为 `--max-num-batched-tokens` 的推理过程中的峰值 GPU 内存使用量。然后,可用的 kv_cache 大小计算为:`--gpu-memory-utilization` * HBM 大小 - 峰值 GPU 内存使用量。因此,`--gpu-memory-utilization` 的值越大,可以使用的 kv_cache 就越多。然而,由于预热阶段的 GPU 内存使用量可能与实际推理期间不同(例如,由于 EP 负载不均),将 `--gpu-memory-utilization` 设置得过高可能会导致实际推理期间出现 OOM(内存不足)问题。默认值为 `0.9`。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:143 +msgid "" +"`--enable-expert-parallel` indicates that EP is enabled. Note that vLLM " +"does not support a mixed approach of ETP and EP; that is, MoE can either " +"use pure EP or pure TP." +msgstr "`--enable-expert-parallel` 表示启用了 EP。请注意,vLLM 不支持 ETP 和 EP 的混合方法;也就是说,MoE 可以使用纯 EP 或纯 TP。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:144 +msgid "" +"`--no-enable-prefix-caching` indicates that prefix caching is disabled. " +"To enable it, remove this option." +msgstr "`--no-enable-prefix-caching` 表示前缀缓存被禁用。要启用它,请移除此选项。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:145 +msgid "" +"`--quantization` \"ascend\" indicates that quantization is used. To " +"disable quantization, remove this option." +msgstr "`--quantization` \"ascend\" 表示使用了量化。要禁用量化,请移除此选项。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:146 +msgid "" +"`--compilation-config` contains configurations related to the aclgraph " +"graph mode. The most significant configurations are \"cudagraph_mode\" " +"and \"cudagraph_capture_sizes\", which have the following meanings: " +"\"cudagraph_mode\": represents the specific graph mode. Currently, " +"\"PIECEWISE\" and \"FULL_DECODE_ONLY\" are supported. The graph mode is " +"mainly used to reduce the cost of operator dispatch. Currently, " +"\"FULL_DECODE_ONLY\" is recommended." +msgstr "" +"`--compilation-config` 包含与 aclgraph 图模式相关的配置。最重要的配置是 \"cudagraph_mode\" 和 \"cudagraph_capture_sizes\",其含义如下:\"cudagraph_mode\":表示特定的图模式。目前支持 \"PIECEWISE\" 和 \"FULL_DECODE_ONLY\"。图模式主要用于降低算子调度的开销。目前推荐使用 \"FULL_DECODE_ONLY\"。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:148 +msgid "" +"\"cudagraph_capture_sizes\": represents different levels of graph modes. " +"The default value is [1, 2, 4, 8, 16, 24, 32, 40,..., `--max-num-seqs`]. " +"In the graph mode, the input for graphs at different levels is fixed, and" +" inputs between levels are automatically padded to the next level. " +"Currently, the default setting is recommended. Only in some scenarios is " +"it necessary to set this separately to achieve optimal performance." +msgstr "" +"\"cudagraph_capture_sizes\":表示不同级别的图模式。默认值为 [1, 2, 4, 8, 16, 24, 32, 40,..., `--max-num-seqs`]。在图模式下,不同级别图的输入是固定的,级别之间的输入会自动填充到下一个级别。目前推荐使用默认设置。只有在某些场景下,才需要单独设置此参数以达到最佳性能。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:149 +msgid "" +"`export VLLM_ASCEND_ENABLE_FLASHCOMM1=1` indicates that Flashcomm1 " +"optimization is enabled. Currently, this optimization is only supported " +"for MoE in scenarios where tp_size > 1." +msgstr "`export VLLM_ASCEND_ENABLE_FLASHCOMM1=1` 表示启用了 Flashcomm1 优化。目前,此优化仅在 tp_size > 1 的场景下对 MoE 支持。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:151 +msgid "Multi-node Deployment with MP (Recommended)" +msgstr "使用 MP 进行多节点部署(推荐)" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:153 +msgid "" +"Assume you have Atlas 800 A3 (64G*16) nodes (or 2* A2), and want to " +"deploy the `Qwen3-VL-235B-A22B-Instruct` model across multiple nodes." +msgstr "假设您有 Atlas 800 A3 (64G*16) 节点(或 2* A2),并希望跨多个节点部署 `Qwen3-VL-235B-A22B-Instruct` 模型。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:155 +msgid "Node 0" +msgstr "节点 0" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:197 +msgid "Node1" +msgstr "节点 1" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:243 +msgid "" +"If the service starts successfully, the following information will be " +"displayed on node 0:" +msgstr "如果服务启动成功,节点 0 上将显示以下信息:" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:254 +msgid "Multi-node Deployment with Ray" +msgstr "使用 Ray 进行多节点部署" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:256 +msgid "refer to [Ray Distributed (Qwen/Qwen3-235B-A22B)](../features/ray.md)." +msgstr "请参考 [Ray 分布式 (Qwen/Qwen3-235B-A22B)](../features/ray.md)。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:258 +msgid "Prefill-Decode Disaggregation" +msgstr "预填充-解码分离" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:260 +msgid "" +"refer to [Prefill-Decode Disaggregation Mooncake Verification " +"(Qwen)](../features/pd_disaggregation_mooncake_multi_node.md)" +msgstr "请参阅 [Prefill-Decode 分离部署 Mooncake 验证 (Qwen)](../features/pd_disaggregation_mooncake_multi_node.md)" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:262 +msgid "Functional Verification" +msgstr "功能验证" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:264 +msgid "Once your server is started, you can query the model with input prompts:" +msgstr "服务器启动后,您可以使用输入提示词查询模型:" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:277 +msgid "Accuracy Evaluation" +msgstr "精度评估" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:279 +msgid "Here are two accuracy evaluation methods." +msgstr "以下是两种精度评估方法。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:281 +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:293 +msgid "Using AISBench" +msgstr "使用 AISBench" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:283 +msgid "" +"Refer to [Using " +"AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " +"details." +msgstr "详情请参阅 [使用 AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:285 +msgid "" +"After execution, you can get the result, here is the result of `Qwen3" +"-235B-A22B-w8a8` in `vllm-ascend:0.11.0rc0` for reference only." +msgstr "执行后,您将获得结果。以下是 `vllm-ascend:0.11.0rc0` 中 `Qwen3-235B-A22B-w8a8` 的结果,仅供参考。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "dataset" +msgstr "数据集" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "version" +msgstr "版本" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "metric" +msgstr "指标" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "mode" +msgstr "模式" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "vllm-api-general-chat" +msgstr "vllm-api-general-chat" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "cevaldataset" +msgstr "cevaldataset" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "-" +msgstr "-" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "accuracy" +msgstr "准确率" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "gen" +msgstr "生成" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "91.16" +msgstr "91.16" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:291 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:295 +msgid "" +"Refer to [Using AISBench for performance " +"evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation) for details." +msgstr "详情请参阅 [使用 AISBench 进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:297 +msgid "Using vLLM Benchmark" +msgstr "使用 vLLM Benchmark" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:299 +msgid "Run performance evaluation of `Qwen3-235B-A22B-w8a8` as an example." +msgstr "以运行 `Qwen3-235B-A22B-w8a8` 的性能评估为例。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:301 +msgid "" +"Refer to [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/) " +"for more details." +msgstr "更多详情请参阅 [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/)。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:303 +msgid "There are three `vllm bench` subcommands:" +msgstr "`vllm bench` 包含三个子命令:" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:305 +msgid "`latency`: Benchmark the latency of a single batch of requests." +msgstr "`latency`:对单批请求的延迟进行基准测试。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:306 +msgid "`serve`: Benchmark the online serving throughput." +msgstr "`serve`:对在线服务吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:307 +msgid "`throughput`: Benchmark offline inference throughput." +msgstr "`throughput`:对离线推理吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:309 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例。按如下方式运行代码。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:316 +msgid "" +"After about several minutes, you can get the performance evaluation " +"result." +msgstr "大约几分钟后,您将获得性能评估结果。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:318 +msgid "Reproducing Performance Results" +msgstr "复现性能结果" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:320 +msgid "" +"In this section, we provide simple scripts to re-produce our latest " +"performance. It is also recommended to read instructions above to " +"understand basic concepts or options in vLLM && vLLM-Ascend." +msgstr "本节提供简单的脚本来复现我们最新的性能结果。也建议阅读上方的说明,以了解 vLLM 和 vLLM-Ascend 中的基本概念或选项。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:322 +msgid "Environment" +msgstr "环境" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:324 +msgid "vLLM v0.13.0" +msgstr "vLLM v0.13.0" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:325 +msgid "vLLM-Ascend v0.13.0rc1" +msgstr "vLLM-Ascend v0.13.0rc1" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:326 +msgid "CANN 8.3.RC2" +msgstr "CANN 8.3.RC2" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:327 +msgid "torch_npu 2.8.0" +msgstr "torch_npu 2.8.0" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:328 +msgid "HDK/driver 25.3.RC1" +msgstr "HDK/驱动 25.3.RC1" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:329 +msgid "triton_ascend 3.2.0" +msgstr "triton_ascend 3.2.0" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:331 +msgid "Single Node A3 (64G*16)" +msgstr "单节点 A3 (64G*16)" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:333 +msgid "Example server scripts:" +msgstr "服务器脚本示例:" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:368 +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:597 +msgid "Benchmark scripts:" +msgstr "基准测试脚本:" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:384 +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:613 +msgid "Reference test results:" +msgstr "参考测试结果:" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "num_requests" +msgstr "请求数量" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "concurrency" +msgstr "并发数" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "mean TTFT(ms)" +msgstr "平均 TTFT(毫秒)" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "mean TPOT(ms)" +msgstr "平均 TPOT(毫秒)" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "output token throughput (tok/s)" +msgstr "输出令牌吞吐量 (令牌/秒)" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "720" +msgstr "720" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "144" +msgstr "144" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "4717.45" +msgstr "4717.45" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "48.69" +msgstr "48.69" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "2761.72" +msgstr "2761.72" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:390 +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:619 +msgid "Note:" +msgstr "注意:" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:392 +msgid "" +"Setting `export VLLM_ASCEND_ENABLE_FUSED_MC2=1` enables MoE fused " +"operators that reduce time consumption of MoE in both prefill and decode." +" This is an experimental feature which only supports W8A8 quantization on" +" Atlas A3 servers now. If you encounter any problems when using this " +"feature, you can disable it by setting `export " +"VLLM_ASCEND_ENABLE_FUSED_MC2=0` and update issues in vLLM-Ascend " +"community." +msgstr "设置 `export VLLM_ASCEND_ENABLE_FUSED_MC2=1` 可启用 MoE 融合算子,以减少预填充和解码阶段 MoE 的时间消耗。这是一个实验性功能,目前仅支持 Atlas A3 服务器上的 W8A8 量化。如果您在使用此功能时遇到任何问题,可以通过设置 `export VLLM_ASCEND_ENABLE_FUSED_MC2=0` 来禁用它,并在 vLLM-Ascend 社区更新问题。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:393 +msgid "" +"Here we disable prefix cache because of random datasets. You can enable " +"prefix cache if requests have long common prefix." +msgstr "由于使用随机数据集,此处我们禁用了前缀缓存。如果请求具有较长的公共前缀,您可以启用前缀缓存。" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:395 +msgid "Three Node A3 -- PD disaggregation" +msgstr "三节点 A3 -- PD 分离部署" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:397 +msgid "" +"On three Atlas 800 A3(64G*16) server, we recommend to use one node as one" +" prefill instance and two nodes as one decode instance. Example server " +"scripts: Prefill Node 1" +msgstr "在三台 Atlas 800 A3(64G*16) 服务器上,我们建议使用一个节点作为一个预填充实例,两个节点作为一个解码实例。服务器脚本示例:预填充节点 1" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:462 +msgid "Decode Node 1" +msgstr "解码节点 1" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:526 +msgid "Decode Node 2" +msgstr "解码节点 2" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:591 +msgid "PD proxy:" +msgstr "PD 代理:" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "2880" +msgstr "2880" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "576" +msgstr "576" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "3735.98" +msgstr "3735.98" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "52.07" +msgstr "52.07" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:76 +msgid "8593.44" +msgstr "8593.44" + +#: ../../source/tutorials/models/Qwen3-235B-A22B.md:621 +msgid "" +"We recommend to set `export VLLM_ASCEND_ENABLE_FUSED_MC2=2` on this " +"scenario (typically EP32 for Qwen3-235B). This enables a different MoE " +"fusion operator." +msgstr "在此场景下(通常 Qwen3-235B 使用 EP32),我们建议设置 `export VLLM_ASCEND_ENABLE_FUSED_MC2=2`。这将启用一个不同的 MoE 融合算子。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-30B-A3B.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-30B-A3B.po new file mode 100644 index 00000000..f97f174e --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-30B-A3B.po @@ -0,0 +1,67 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Qwen3-30B-A3B.md:1 +msgid "Qwen3-30B-A3B" +msgstr "Qwen3-30B-A3B" + +#: ../../source/tutorials/models/Qwen3-30B-A3B.md:3 +msgid "Run vllm-ascend on Multi-NPU with Qwen3 MoE" +msgstr "在 Multi-NPU 上使用 Qwen3 MoE 运行 vllm-ascend" + +#: ../../source/tutorials/models/Qwen3-30B-A3B.md:5 +msgid "Run docker container:" +msgstr "运行 docker 容器:" + +#: ../../source/tutorials/models/Qwen3-30B-A3B.md:34 +msgid "Set up environment variables:" +msgstr "设置环境变量:" + +#: ../../source/tutorials/models/Qwen3-30B-A3B.md:44 +msgid "Online Inference on Multi-NPU" +msgstr "在 Multi-NPU 上进行在线推理" + +#: ../../source/tutorials/models/Qwen3-30B-A3B.md:46 +msgid "Run the following script to start the vLLM server on Multi-NPU:" +msgstr "运行以下脚本以在 Multi-NPU 上启动 vLLM 服务器:" + +#: ../../source/tutorials/models/Qwen3-30B-A3B.md:48 +msgid "" +"For an Atlas A2 with 64 GB of NPU card memory, tensor-parallel-size " +"should be at least 2, and for 32 GB of memory, tensor-parallel-size " +"should be at least 4." +msgstr "对于具有 64 GB NPU 卡内存的 Atlas A2,tensor-parallel-size 应至少为 2;对于 32 GB 内存,tensor-parallel-size 应至少为 4。" + +#: ../../source/tutorials/models/Qwen3-30B-A3B.md:54 +msgid "Once your server is started, you can query the model with input prompts." +msgstr "服务器启动后,您可以使用输入提示词查询模型。" + +#: ../../source/tutorials/models/Qwen3-30B-A3B.md:69 +msgid "Offline Inference on Multi-NPU" +msgstr "在 Multi-NPU 上进行离线推理" + +#: ../../source/tutorials/models/Qwen3-30B-A3B.md:71 +msgid "Run the following script to execute offline inference on multi-NPU:" +msgstr "运行以下脚本以在 multi-NPU 上执行离线推理:" + +#: ../../source/tutorials/models/Qwen3-30B-A3B.md:108 +msgid "If you run this script successfully, you can see the info shown below:" +msgstr "如果成功运行此脚本,您将看到如下所示的信息:" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-32B-W4A4.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-32B-W4A4.po new file mode 100644 index 00000000..d8ce0a09 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-32B-W4A4.po @@ -0,0 +1,88 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Qwen3-32B-W4A4.md:1 +msgid "Qwen3-32B-W4A4" +msgstr "Qwen3-32B-W4A4" + +#: ../../source/tutorials/models/Qwen3-32B-W4A4.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/Qwen3-32B-W4A4.md:5 +msgid "" +"W4A4 Flat Quantization is for better model compression and inference " +"efficiency on Ascend devices. And W4A4 is supported since `v0.11.0rc1`. " +"For modelslim, W4A4 is supported since `tag_MindStudio_8.2.RC1.B120_002`." +msgstr "" +"W4A4 扁平量化旨在提升模型在昇腾设备上的压缩率和推理效率。W4A4 自 `v0.11.0rc1` 版本起获得支持。对于 modelslim,W4A4 自 `tag_MindStudio_8.2.RC1.B120_002` 版本起获得支持。" + +#: ../../source/tutorials/models/Qwen3-32B-W4A4.md:8 +msgid "The following steps will show how to quantize Qwen3 32B to W4A4." +msgstr "以下步骤将展示如何将 Qwen3 32B 量化为 W4A4。" + +#: ../../source/tutorials/models/Qwen3-32B-W4A4.md:10 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/Qwen3-32B-W4A4.md:12 +msgid "Run Docker Container" +msgstr "运行 Docker 容器" + +#: ../../source/tutorials/models/Qwen3-32B-W4A4.md:35 +msgid "Install modelslim and Convert Model" +msgstr "安装 modelslim 并转换模型" + +#: ../../source/tutorials/models/Qwen3-32B-W4A4.md:38 +msgid "" +"You can choose to convert the model yourself or use the quantized model " +"we uploaded, see " +msgstr "" +"您可以选择自行转换模型,或使用我们已上传的量化模型,详见 " + +#: ../../source/tutorials/models/Qwen3-32B-W4A4.md:68 +msgid "Verify the Quantized Model" +msgstr "验证量化模型" + +#: ../../source/tutorials/models/Qwen3-32B-W4A4.md:70 +msgid "The converted model files look like:" +msgstr "转换后的模型文件结构如下:" + +#: ../../source/tutorials/models/Qwen3-32B-W4A4.md:95 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/Qwen3-32B-W4A4.md:97 +msgid "Online Serving on Single NPU" +msgstr "单 NPU 在线服务" + +#: ../../source/tutorials/models/Qwen3-32B-W4A4.md:103 +msgid "Once your server is started, you can query the model with input prompts." +msgstr "服务器启动后,您可以使用输入提示词查询模型。" + +#: ../../source/tutorials/models/Qwen3-32B-W4A4.md:118 +msgid "Offline Inference on Single NPU" +msgstr "单 NPU 离线推理" + +#: ../../source/tutorials/models/Qwen3-32B-W4A4.md:121 +msgid "To enable quantization for ascend, quantization method must be \"ascend\"." +msgstr "要为昇腾启用量化,量化方法必须设置为 \"ascend\"。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-8B-W4A8.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-8B-W4A8.po new file mode 100644 index 00000000..75649e58 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-8B-W4A8.po @@ -0,0 +1,72 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Qwen3-8B-W4A8.md:1 +msgid "Qwen3-8B-W4A8" +msgstr "Qwen3-8B-W4A8" + +#: ../../source/tutorials/models/Qwen3-8B-W4A8.md:3 +msgid "Run Docker Container" +msgstr "运行 Docker 容器" + +#: ../../source/tutorials/models/Qwen3-8B-W4A8.md:6 +msgid "w4a8 quantization feature is supported by v0.9.1rc2 and later." +msgstr "w4a8 量化特性由 v0.9.1rc2 及更高版本支持。" + +#: ../../source/tutorials/models/Qwen3-8B-W4A8.md:30 +msgid "Install modelslim and Convert Model" +msgstr "安装 modelslim 并转换模型" + +#: ../../source/tutorials/models/Qwen3-8B-W4A8.md:33 +msgid "" +"You can choose to convert the model yourself or use the quantized model " +"we uploaded, see " +msgstr "" +"您可以选择自行转换模型,或使用我们已上传的量化模型,请参阅 " + +#: ../../source/tutorials/models/Qwen3-8B-W4A8.md:73 +msgid "Verify the Quantized Model" +msgstr "验证量化模型" + +#: ../../source/tutorials/models/Qwen3-8B-W4A8.md:75 +msgid "The converted model files look like:" +msgstr "转换后的模型文件结构如下:" + +#: ../../source/tutorials/models/Qwen3-8B-W4A8.md:93 +msgid "" +"Run the following script to start the vLLM server with the quantized " +"model:" +msgstr "运行以下脚本来启动使用量化模型的 vLLM 服务器:" + +#: ../../source/tutorials/models/Qwen3-8B-W4A8.md:101 +msgid "Once your server is started, you can query the model with input prompts." +msgstr "服务器启动后,您可以通过输入提示词来查询模型。" + +#: ../../source/tutorials/models/Qwen3-8B-W4A8.md:116 +msgid "" +"Run the following script to execute offline inference on single-NPU with " +"the quantized model:" +msgstr "运行以下脚本,使用量化模型在单 NPU 上进行离线推理:" + +#: ../../source/tutorials/models/Qwen3-8B-W4A8.md:119 +msgid "To enable quantization for ascend, quantization method must be \"ascend\"." +msgstr "要为 Ascend 启用量化,量化方法必须设置为 \"ascend\"。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Coder-30B-A3B.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Coder-30B-A3B.po new file mode 100644 index 00000000..becd60d6 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Coder-30B-A3B.po @@ -0,0 +1,210 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:1 +msgid "Qwen3-Coder-30B-A3B" +msgstr "Qwen3-Coder-30B-A3B" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:5 +msgid "" +"The newly released Qwen3-Coder-30B-A3B employs a sparse MoE architecture " +"for efficient training and inference, delivering significant " +"optimizations in agentic coding, extended context support of up to 1M " +"tokens, and versatile function calling." +msgstr "新发布的 Qwen3-Coder-30B-A3B 采用稀疏 MoE 架构,以实现高效的训练和推理,在智能体编码、高达 1M token 的扩展上下文支持以及多功能调用方面带来了显著优化。" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:7 +msgid "" +"This document will show the main verification steps of the model, " +"including supported features, feature configuration, environment " +"preparation, single-node deployment, accuracy and performance evaluation." +msgstr "本文档将展示该模型的主要验证步骤,包括支持的功能、功能配置、环境准备、单节点部署、精度和性能评估。" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:9 +msgid "Supported Features" +msgstr "支持的功能" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:11 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考[支持的功能](../../user_guide/support_matrix/supported_models.md)以获取模型支持的功能矩阵。" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:13 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "请参考[功能指南](../../user_guide/feature_guide/index.md)以获取功能的配置信息。" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:15 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:17 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:19 +msgid "" +"`Qwen3-Coder-30B-A3B-Instruct`(BF16 version): requires 1 Atlas 800 A3 " +"node (with 16x 64G NPUs) or 1 Atlas 800 A2 node (with 8x 64G/32G NPUs). " +"[Download model weight](https://modelscope.cn/models/Qwen/Qwen3-Coder-" +"30B-A3B-Instruct)" +msgstr "`Qwen3-Coder-30B-A3B-Instruct`(BF16 版本):需要 1 个 Atlas 800 A3 节点(配备 16 个 64G NPU)或 1 个 Atlas 800 A2 节点(配备 8 个 64G/32G NPU)。[下载模型权重](https://modelscope.cn/models/Qwen/Qwen3-Coder-30B-A3B-Instruct)" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:21 +msgid "" +"It is recommended to download the model weight to the shared directory of" +" multiple nodes, such as `/root/.cache/`" +msgstr "建议将模型权重下载到多节点的共享目录中,例如 `/root/.cache/`" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:23 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:25 +msgid "" +"`Qwen3-Coder` is first supported in `vllm-ascend:v0.10.0rc1`, please run " +"this model using a later version." +msgstr "`Qwen3-Coder` 首次在 `vllm-ascend:v0.10.0rc1` 中得到支持,请使用此版本或更高版本来运行此模型。" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:27 +msgid "" +"You can use our official docker image to run `Qwen3-Coder-30B-A3B-" +"Instruct` directly." +msgstr "您可以使用我们的官方 docker 镜像直接运行 `Qwen3-Coder-30B-A3B-Instruct`。" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:53 +msgid "" +"In addition, if you don't want to use the docker image as above, you can " +"also build all from source:" +msgstr "此外,如果您不想使用上述 docker 镜像,也可以从源代码构建所有内容:" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:55 +msgid "" +"Install `vllm-ascend` from source, refer to " +"[installation](../../installation.md)." +msgstr "从源代码安装 `vllm-ascend`,请参考[安装指南](../../installation.md)。" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:57 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:59 +msgid "Single-node Deployment" +msgstr "单节点部署" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:61 +msgid "Run the following script to execute online inference." +msgstr "运行以下脚本来执行在线推理。" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:63 +msgid "" +"For an Atlas A2 with 64 GB of NPU card memory, tensor-parallel-size " +"should be at least 2, and for 32 GB of memory, tensor-parallel-size " +"should be at least 4." +msgstr "对于配备 64 GB NPU 显存的 Atlas A2,张量并行大小应至少为 2;对于 32 GB 显存,张量并行大小应至少为 4。" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:72 +msgid "Functional Verification" +msgstr "功能验证" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:74 +msgid "Once your server is started, you can query the model with input prompts:" +msgstr "服务器启动后,您可以使用输入提示词查询模型:" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:89 +msgid "Accuracy Evaluation" +msgstr "精度评估" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:91 +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:103 +msgid "Using AISBench" +msgstr "使用 AISBench" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:93 +msgid "" +"Refer to [Using " +"AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " +"details." +msgstr "详情请参考[使用 AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:95 +msgid "" +"After execution, you can get the result, here is the result of `Qwen3" +"-Coder-30B-A3B-Instruct` in `vllm-ascend:0.11.0rc0` for reference only." +msgstr "执行后,您可以获得结果。以下是 `Qwen3-Coder-30B-A3B-Instruct` 在 `vllm-ascend:0.11.0rc0` 中的结果,仅供参考。" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:29 +msgid "dataset" +msgstr "数据集" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:29 +msgid "version" +msgstr "版本" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:29 +msgid "metric" +msgstr "指标" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:29 +msgid "mode" +msgstr "模式" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:29 +msgid "vllm-api-general-chat" +msgstr "vllm-api-general-chat" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:29 +msgid "openai_humaneval" +msgstr "openai_humaneval" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:29 +msgid "f4a973" +msgstr "f4a973" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:29 +msgid "humaneval_pass@1" +msgstr "humaneval_pass@1" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:29 +msgid "gen" +msgstr "gen" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:29 +msgid "94.51" +msgstr "94.51" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:101 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/Qwen3-Coder-30B-A3B.md:105 +msgid "" +"Refer to [Using AISBench for performance " +"evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation) for details." +msgstr "详情请参考[使用 AISBench 进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Dense.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Dense.po new file mode 100644 index 00000000..1d2f2c88 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Dense.po @@ -0,0 +1,866 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Qwen3-Dense.md:1 +msgid "Qwen3-Dense(Qwen3-0.6B/8B/32B)" +msgstr "Qwen3-Dense(Qwen3-0.6B/8B/32B)" + +#: ../../source/tutorials/models/Qwen3-Dense.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/Qwen3-Dense.md:5 +msgid "" +"Qwen3 is the latest generation of large language models in Qwen series, " +"offering a comprehensive suite of dense and mixture-of-experts (MoE) " +"models. Built upon extensive training, Qwen3 delivers groundbreaking " +"advancements in reasoning, instruction-following, agent capabilities, and" +" multilingual support." +msgstr "" +"Qwen3 是 Qwen 系列最新一代的大语言模型,提供了一套完整的稠密模型和专家混合" +"(MoE) 模型。基于广泛的训练,Qwen3 在推理、指令遵循、智能体能力和多语言支持方" +"面实现了突破性进展。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:7 +msgid "" +"Welcome to the tutorial on optimizing Qwen Dense models in the vLLM-" +"Ascend environment. This guide will help you configure the most effective" +" settings for your use case, with practical examples that highlight key " +"optimization points. We will also explore how adjusting service " +"parameters can maximize throughput performance across various scenarios." +msgstr "" +"欢迎阅读在 vLLM-Ascend 环境中优化 Qwen 稠密模型的教程。本指南将帮助您为您的用" +"例配置最有效的设置,并通过实际示例突出关键优化点。我们还将探讨如何调整服务参" +"数以在各种场景下最大化吞吐性能。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:9 +msgid "" +"This document will show the main verification steps of the model, " +"including supported features, feature configuration, environment " +"preparation, accuracy and performance evaluation." +msgstr "" +"本文档将展示模型的主要验证步骤,包括支持的特性、特性配置、环境准备、精度和性" +"能评估。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:11 +msgid "" +"The Qwen3 Dense models are first supported in " +"[v0.8.4rc2](https://github.com/vllm-project/vllm-" +"ascend/blob/main/docs/source/user_guide/release_notes.md#v084rc2---" +"20250429). This example requires version **v0.11.0rc2**. Earlier versions" +" may lack certain features." +msgstr "" +"Qwen3 稠密模型首次在 " +"[v0.8.4rc2](https://github.com/vllm-project/vllm-" +"ascend/blob/main/docs/source/user_guide/release_notes.md#v084rc2---" +"20250429) 中得到支持。本示例需要版本 **v0.11.0rc2**。更早的版本可能缺少某些特" +"性。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:13 +msgid "Supported Features" +msgstr "支持的特性" + +#: ../../source/tutorials/models/Qwen3-Dense.md:15 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "" +"请参考 [支持的特性](../../user_guide/support_matrix/supported_models." +"md) 以获取模型支持的特性矩阵。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:17 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "" +"请参考 [特性指南](../../user_guide/feature_guide/index.md) 以获取特性的配置信" +"息。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:19 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/Qwen3-Dense.md:21 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/Qwen3-Dense.md:23 +msgid "" +"`Qwen3-0.6B`(BF16 version): require 1 Atlas 800 A3 (64G × 2) card or 1 " +"Atlas 800I A2 (64G × 1) card. [Download model " +"weight](https://modelers.cn/models/Modelers_Park/Qwen3-0.6B)" +msgstr "" +"`Qwen3-0.6B`(BF16 版本): 需要 1 张 Atlas 800 A3 (64G × 2) 卡或 1 张 Atlas " +"800I A2 (64G × 1) 卡。[下载模型权重](https://modelers.cn/models/" +"Modelers_Park/Qwen3-0.6B)" + +#: ../../source/tutorials/models/Qwen3-Dense.md:24 +msgid "" +"`Qwen3-1.7B`(BF16 version): require 1 Atlas 800 A3 (64G × 2) card or 1 " +"Atlas 800I A2 (64G × 1) card. [Download model " +"weight](https://modelers.cn/models/Modelers_Park/Qwen3-1.7B)" +msgstr "" +"`Qwen3-1.7B`(BF16 版本): 需要 1 张 Atlas 800 A3 (64G × 2) 卡或 1 张 Atlas " +"800I A2 (64G × 1) 卡。[下载模型权重](https://modelers.cn/models/" +"Modelers_Park/Qwen3-1.7B)" + +#: ../../source/tutorials/models/Qwen3-Dense.md:25 +msgid "" +"`Qwen3-4B`(BF16 version): require 1 Atlas 800 A3 (64G × 2) card or 1 " +"Atlas 800I A2 (64G × 1) card. [Download model " +"weight](https://modelers.cn/models/Modelers_Park/Qwen3-4B)" +msgstr "" +"`Qwen3-4B`(BF16 版本): 需要 1 张 Atlas 800 A3 (64G × 2) 卡或 1 张 Atlas " +"800I A2 (64G × 1) 卡。[下载模型权重](https://modelers.cn/models/" +"Modelers_Park/Qwen3-4B)" + +#: ../../source/tutorials/models/Qwen3-Dense.md:26 +msgid "" +"`Qwen3-8B`(BF16 version): require 1 Atlas 800 A3 (64G × 2) card or 1 " +"Atlas 800I A2 (64G × 1) card. [Download model " +"weight](https://modelers.cn/models/Modelers_Park/Qwen3-8B)" +msgstr "" +"`Qwen3-8B`(BF16 版本): 需要 1 张 Atlas 800 A3 (64G × 2) 卡或 1 张 Atlas " +"800I A2 (64G × 1) 卡。[下载模型权重](https://modelers.cn/models/" +"Modelers_Park/Qwen3-8B)" + +#: ../../source/tutorials/models/Qwen3-Dense.md:27 +msgid "" +"`Qwen3-14B`(BF16 version): require 1 Atlas 800 A3 (64G × 2) card or 2 " +"Atlas 800I A2 (64G × 1) cards. [Download model " +"weight](https://modelers.cn/models/Modelers_Park/Qwen3-14B)" +msgstr "" +"`Qwen3-14B`(BF16 版本): 需要 1 张 Atlas 800 A3 (64G × 2) 卡或 2 张 Atlas " +"800I A2 (64G × 1) 卡。[下载模型权重](https://modelers.cn/models/" +"Modelers_Park/Qwen3-14B)" + +#: ../../source/tutorials/models/Qwen3-Dense.md:28 +msgid "" +"`Qwen3-32B`(BF16 version): require 2 Atlas 800 A3 (64G × 4) cards or 4 " +"Atlas 800I A2 (64G × 4) cards. [Download model " +"weight](https://modelers.cn/models/Modelers_Park/Qwen3-32B)" +msgstr "" +"`Qwen3-32B`(BF16 版本): 需要 2 张 Atlas 800 A3 (64G × 4) 卡或 4 张 Atlas " +"800I A2 (64G × 4) 卡。[下载模型权重](https://modelers.cn/models/" +"Modelers_Park/Qwen3-32B)" + +#: ../../source/tutorials/models/Qwen3-Dense.md:29 +msgid "" +"`Qwen3-32B-W8A8`(Quantized version): require 2 Atlas 800 A3 (64G × 4) " +"cards or 4 Atlas 800I A2 (64G × 4) cards. [Download model " +"weight](https://www.modelscope.cn/models/vllm-ascend/Qwen3-32B-W8A8)" +msgstr "" +"`Qwen3-32B-W8A8`(量化版本): 需要 2 张 Atlas 800 A3 (64G × 4) 卡或 4 张 " +"Atlas 800I A2 (64G × 4) 卡。[下载模型权重](https://www.modelscope.cn/" +"models/vllm-ascend/Qwen3-32B-W8A8)" + +#: ../../source/tutorials/models/Qwen3-Dense.md:31 +msgid "" +"These are the recommended numbers of cards, which can be adjusted " +"according to the actual situation." +msgstr "这些是推荐的卡数,可以根据实际情况进行调整。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:33 +msgid "" +"It is recommended to download the model weight to the shared directory of" +" multiple nodes, such as `/root/.cache/`" +msgstr "建议将模型权重下载到多节点的共享目录,例如 `/root/.cache/`" + +#: ../../source/tutorials/models/Qwen3-Dense.md:35 +msgid "Verify Multi-node Communication(Optional)" +msgstr "验证多节点通信(可选)" + +#: ../../source/tutorials/models/Qwen3-Dense.md:37 +msgid "" +"If you want to deploy multi-node environment, you need to verify multi-" +"node communication according to [verify multi-node communication " +"environment](../../installation.md#verify-multi-node-communication)." +msgstr "" +"如果您想部署多节点环境,需要根据 [验证多节点通信环境](../../installation." +"md#verify-multi-node-communication) 来验证多节点通信。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:39 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/Qwen3-Dense.md:41 +msgid "" +"You can use our official docker image for supporting Qwen3 Dense models. " +"Currently, we provide the all-in-one images.[Download " +"images](https://quay.io/repository/ascend/vllm-ascend?tab=tags)" +msgstr "" +"您可以使用我们的官方 docker 镜像来支持 Qwen3 稠密模型。目前,我们提供一体化镜" +"像。[下载镜像](https://quay.io/repository/ascend/vllm-ascend?tab=tags)" + +#: ../../source/tutorials/models/Qwen3-Dense.md:44 +msgid "Docker Pull (by tag)" +msgstr "Docker 拉取(通过标签)" + +#: ../../source/tutorials/models/Qwen3-Dense.md:53 +msgid "Docker run" +msgstr "Docker 运行" + +#: ../../source/tutorials/models/Qwen3-Dense.md:90 +msgid "" +"The default workdir is `/workspace`, vLLM and vLLM Ascend code are placed" +" in `/vllm-workspace` and installed in [development " +"mode](https://setuptools.pypa.io/en/latest/userguide/development_mode.html)" +" (`pip install -e`) to help developer immediately take place changes " +"without requiring a new installation." +msgstr "" +"默认工作目录是 `/workspace`,vLLM 和 vLLM Ascend 代码放置在 `/vllm-" +"workspace` 中,并以 [开发模式](https://setuptools.pypa.io/en/latest/" +"userguide/development_mode.html) (`pip install -e`) 安装,以帮助开发者立即应用" +"更改而无需重新安装。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:92 +msgid "" +"In the [Run docker container](./Qwen3-Dense.md#run-docker-container), " +"detailed explanations are provided through specific examples." +msgstr "" +"在 [运行 docker 容器](./Qwen3-Dense.md#run-docker-container) 中,通过具体示例" +"提供了详细说明。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:94 +msgid "" +"In addition, if you don't want to use the docker image as above, you can " +"also build all from source:" +msgstr "此外,如果您不想使用上述 docker 镜像,也可以从源码构建所有内容:" + +#: ../../source/tutorials/models/Qwen3-Dense.md:96 +msgid "" +"Install `vllm-ascend` from source, refer to " +"[installation](../../installation.md)." +msgstr "从源码安装 `vllm-ascend`,请参考 [安装](../../installation.md)。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:98 +msgid "" +"If you want to deploy multi-node environment, you need to set up " +"environment on each node." +msgstr "如果您想部署多节点环境,需要在每个节点上设置环境。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:100 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/Qwen3-Dense.md:102 +msgid "" +"In this section, we will demonstrate best practices for adjusting " +"hyperparameters in vLLM-Ascend to maximize inference throughput " +"performance. By tailoring service-level configurations to fit different " +"use cases, you can ensure that your system performs optimally across " +"various scenarios. We will guide you through how to fine-tune " +"hyperparameters based on observed phenomena, such as max_model_len, " +"max_num_batched_tokens, and cudagraph_capture_sizes, to achieve the best " +"performance." +msgstr "" +"在本节中,我们将演示在 vLLM-Ascend 中调整超参数以实现最大推理吞吐性能的最佳实" +"践。通过定制服务级配置以适应不同的用例,您可以确保您的系统在各种场景下都能达" +"到最佳性能。我们将指导您如何根据观察到的现象(例如 max_model_len、" +"max_num_batched_tokens 和 cudagraph_capture_sizes)来微调超参数,以获得最佳性" +"能。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:104 +msgid "The specific example scenario is as follows:" +msgstr "具体示例如下:" + +#: ../../source/tutorials/models/Qwen3-Dense.md:106 +msgid "The machine environment is an Atlas 800 A3 (64G*16)" +msgstr "机器环境是 Atlas 800 A3 (64G*16)" + +#: ../../source/tutorials/models/Qwen3-Dense.md:107 +msgid "The LLM is Qwen3-32B-W8A8" +msgstr "LLM 是 Qwen3-32B-W8A8" + +#: ../../source/tutorials/models/Qwen3-Dense.md:108 +msgid "The data scenario is a fixed-length input of 3.5K and an output of 1.5K." +msgstr "数据场景是固定长度输入 3.5K 和输出 1.5K。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:109 +msgid "The parallel configuration requirement is DP=1&TP=4" +msgstr "并行配置要求是 DP=1&TP=4" + +#: ../../source/tutorials/models/Qwen3-Dense.md:110 +msgid "" +"If the machine environment is an **Atlas 800I A2(64G*8)**, the deployment" +" approach stays identical." +msgstr "如果机器环境是 **Atlas 800I A2(64G*8)**,部署方法保持不变。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:112 +msgid "Run docker container" +msgstr "运行 docker 容器" + +#: ../../source/tutorials/models/Qwen3-Dense.md:116 +#: ../../source/tutorials/models/Qwen3-Dense.md:192 +#: ../../source/tutorials/models/Qwen3-Dense.md:222 +#: ../../source/tutorials/models/Qwen3-Dense.md:303 +msgid "" +"vllm-ascend/Qwen3-32B-W8A8 is the default model path, replace this with " +"your actual path." +msgstr "vllm-ascend/Qwen3-32B-W8A8 是默认模型路径,请替换为您的实际路径。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:117 +msgid "v0.11.0rc2-a3 is image tag, replace this with your actual tag." +msgstr "v0.11.0rc2-a3 是镜像标签,请替换为您的实际标签。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:118 +msgid "replace this with your actual port: '-p 8113:8113'." +msgstr "请替换为您的实际端口:'-p 8113:8113'。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:119 +msgid "replace this with your actual card: '--device /dev/davinci0'." +msgstr "请替换为您的实际卡:'--device /dev/davinci0'。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:147 +msgid "Online Inference on Multi-NPU" +msgstr "多 NPU 在线推理" + +#: ../../source/tutorials/models/Qwen3-Dense.md:149 +msgid "Run the following script to start the vLLM server on Multi-NPU." +msgstr "运行以下脚本以在多 NPU 上启动 vLLM 服务器。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:151 +msgid "" +"This script is configured to achieve optimal performance under the above " +"specific example scenarios,with batchsize = 72 on two A3 cards." +msgstr "此脚本配置为在上述特定示例场景下实现最佳性能,在两块 A3 卡上 batchsize = 72。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:194 +msgid "" +"If the model is not a quantized model, remove the `--quantization ascend`" +" parameter." +msgstr "如果模型不是量化模型,请移除 `--quantization ascend` 参数。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:196 +#, python-brace-format +msgid "" +"**[Optional]** `--additional-config '{\"pa_shape_list\":[48,64,72,80]}'`:" +" `pa_shape_list` specifies the batch sizes where you want to switch to " +"the PA operator. This is a temporary tuning knob. Currently, the " +"attention operator dispatch defaults to the FIA operator. In some batch-" +"size (concurrency) settings, FIA may have suboptimal performance. By " +"setting `pa_shape_list`, when the runtime batch size matches one of the " +"listed values, vLLM-Ascend will replace FIA with the PA operator to " +"prevent performance degradation. In the future, FIA will be optimized for" +" these scenarios and this parameter will be removed." +msgstr "" +"**[可选]** `--additional-config '{\"pa_shape_list\":[48,64,72,80]}'`: " +"`pa_shape_list` 指定了您希望切换到 PA 算子的批次大小。这是一个临时的调优旋" +"钮。目前,注意力算子调度默认使用 FIA 算子。在某些批次大小(并发)设置下,FIA " +"可能性能不佳。通过设置 `pa_shape_list`,当运行时批次大小与列出的值之一匹配时," +"vLLM-Ascend 将用 PA 算子替换 FIA 算子以防止性能下降。未来,FIA 将针对这些场景" +"进行优化,此参数将被移除。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:198 +#, python-brace-format +msgid "" +"If the ultimate performance is desired, the cudagraph_capture_sizes " +"parameter can be enabled, reference: [key-optimization-" +"points](./Qwen3-Dense.md#key-optimization-points)、[optimization-" +"highlights](./Qwen3-Dense.md#optimization-highlights). Here is an example" +" of batchsize of 72: `--compilation-config '{\"cudagraph_mode\": " +"\"FULL_DECODE_ONLY\", " +"\"cudagraph_capture_sizes\":[1,8,24,48,60,64,72,76]}'`." +msgstr "" +"如果需要极致性能,可以启用 cudagraph_capture_sizes 参数,参考:[关键优化" +"点](./Qwen3-Dense.md#key-optimization-points)、[优化亮点](./Qwen3-" +"Dense.md#optimization-highlights)。以下是批次大小为 72 的示例:`--compilation-" +"config '{\"cudagraph_mode\": \"FULL_DECODE_ONLY\", " +"\"cudagraph_capture_sizes\":[1,8,24,48,60,64,72,76]}'`。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:201 +msgid "Once your server is started, you can query the model with input prompts" +msgstr "服务器启动后,您可以使用输入提示词查询模型" + +#: ../../source/tutorials/models/Qwen3-Dense.md:216 +msgid "Offline Inference on Multi-NPU" +msgstr "多 NPU 离线推理" + +#: ../../source/tutorials/models/Qwen3-Dense.md:218 +msgid "Run the following script to execute offline inference on multi-NPU." +msgstr "运行以下脚本以在多 NPU 上执行离线推理。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:224 +msgid "" +"If the model is not a quantized model,remove the " +"`quantization=\"ascend\"` parameter." +msgstr "如果模型不是量化模型,请移除 `quantization=\"ascend\"` 参数。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:265 +msgid "Accuracy Evaluation" +msgstr "精度评估" + +#: ../../source/tutorials/models/Qwen3-Dense.md:267 +msgid "Here is one accuracy evaluation methods." +msgstr "这里是一种精度评估方法。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:269 +#: ../../source/tutorials/models/Qwen3-Dense.md:283 +msgid "Using AISBench" +msgstr "使用 AISBench" + +#: ../../source/tutorials/models/Qwen3-Dense.md:271 +msgid "" +"Refer to [Using " +"AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " +"details." +msgstr "详情请参阅[使用AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:273 +msgid "" +"After execution, you can get the result, here is the result of `Qwen3" +"-32B-W8A8` in `vllm-ascend:0.11.0rc2` for reference only." +msgstr "执行后,您将获得结果。此处展示的是 `Qwen3-32B-W8A8` 在 `vllm-ascend:0.11.0rc2` 环境下的结果,仅供参考。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:220 +msgid "dataset" +msgstr "数据集" + +#: ../../source/tutorials/models/Qwen3-Dense.md:220 +msgid "version" +msgstr "版本" + +#: ../../source/tutorials/models/Qwen3-Dense.md:220 +msgid "metric" +msgstr "指标" + +#: ../../source/tutorials/models/Qwen3-Dense.md:220 +msgid "mode" +msgstr "模式" + +#: ../../source/tutorials/models/Qwen3-Dense.md:220 +msgid "task name" +msgstr "任务名称" + +#: ../../source/tutorials/models/Qwen3-Dense.md:220 +msgid "vllm-api-general-chat" +msgstr "vllm-api-general-chat" + +#: ../../source/tutorials/models/Qwen3-Dense.md:220 +msgid "gsm8k" +msgstr "gsm8k" + +#: ../../source/tutorials/models/Qwen3-Dense.md:220 +msgid "-" +msgstr "-" + +#: ../../source/tutorials/models/Qwen3-Dense.md:220 +msgid "accuracy" +msgstr "准确率" + +#: ../../source/tutorials/models/Qwen3-Dense.md:220 +msgid "gen" +msgstr "生成" + +#: ../../source/tutorials/models/Qwen3-Dense.md:220 +msgid "gsm8k_gen_0_shot_noncot_chat_prompt" +msgstr "gsm8k_gen_0_shot_noncot_chat_prompt" + +#: ../../source/tutorials/models/Qwen3-Dense.md:220 +msgid "96.44" +msgstr "96.44" + +#: ../../source/tutorials/models/Qwen3-Dense.md:220 +msgid "math500" +msgstr "math500" + +#: ../../source/tutorials/models/Qwen3-Dense.md:220 +msgid "math500_gen_0_shot_cot_chat_prompt" +msgstr "math500_gen_0_shot_cot_chat_prompt" + +#: ../../source/tutorials/models/Qwen3-Dense.md:220 +msgid "97.60" +msgstr "97.60" + +#: ../../source/tutorials/models/Qwen3-Dense.md:220 +msgid "aime" +msgstr "aime" + +#: ../../source/tutorials/models/Qwen3-Dense.md:220 +msgid "aime2024_gen_0_shot_chat_prompt" +msgstr "aime2024_gen_0_shot_chat_prompt" + +#: ../../source/tutorials/models/Qwen3-Dense.md:220 +msgid "76.67" +msgstr "76.67" + +#: ../../source/tutorials/models/Qwen3-Dense.md:281 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/Qwen3-Dense.md:285 +msgid "" +"Refer to [Using AISBench for performance " +"evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation) for details." +msgstr "详情请参阅[使用AISBench进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:287 +msgid "Using vLLM Benchmark" +msgstr "使用vLLM基准测试" + +#: ../../source/tutorials/models/Qwen3-Dense.md:289 +msgid "Run performance evaluation of `Qwen3-32B-W8A8` as an example." +msgstr "以运行 `Qwen3-32B-W8A8` 的性能评估为例。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:291 +msgid "" +"Refer to [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/) " +"for more details." +msgstr "更多详情请参阅[vllm基准测试](https://docs.vllm.ai/en/latest/benchmarking/)。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:293 +msgid "There are three `vllm bench` subcommands:" +msgstr "`vllm bench` 包含三个子命令:" + +#: ../../source/tutorials/models/Qwen3-Dense.md:295 +msgid "`latency`: Benchmark the latency of a single batch of requests." +msgstr "`latency`:基准测试单批次请求的延迟。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:296 +msgid "`serve`: Benchmark the online serving throughput." +msgstr "`serve`:基准测试在线服务吞吐量。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:297 +msgid "`throughput`: Benchmark offline inference throughput." +msgstr "`throughput`:基准测试离线推理吞吐量。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:299 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例。按如下方式运行代码。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:310 +msgid "" +"After about several minutes, you can get the performance evaluation " +"result." +msgstr "大约几分钟后,您将获得性能评估结果。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:312 +msgid "Key Optimization Points" +msgstr "关键优化点" + +#: ../../source/tutorials/models/Qwen3-Dense.md:314 +msgid "" +"In this section, we will cover the key optimization points that can " +"significantly improve the performance of Qwen Dense models. These " +"techniques are designed to enhance throughput and efficiency across " +"various scenarios." +msgstr "本节将介绍能显著提升Qwen Dense模型性能的关键优化点。这些技术旨在提升各种场景下的吞吐量和效率。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:316 +msgid "1. Rope Optimization" +msgstr "1. Rope优化" + +#: ../../source/tutorials/models/Qwen3-Dense.md:318 +msgid "" +"Rope optimization enhances the model's efficiency by modifying the " +"position encoding process. Specifically, it ensures that the " +"cos_sin_cache and the associated index selection operation are only " +"performed during the first layer of the forward pass. For subsequent " +"layers, the position encoding is directly reused, eliminating redundant " +"calculations and significantly speeding up inference in decode phase." +msgstr "Rope优化通过修改位置编码过程来提升模型效率。具体来说,它确保 `cos_sin_cache` 及相关索引选择操作仅在正向传播的第一层执行。对于后续层,位置编码被直接复用,消除了冗余计算,并显著加快了解码阶段的推理速度。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:320 +#: ../../source/tutorials/models/Qwen3-Dense.md:326 +#: ../../source/tutorials/models/Qwen3-Dense.md:354 +msgid "" +"This optimization is enabled by default and does not require any " +"additional environment variables to be set." +msgstr "此优化默认启用,无需设置任何额外的环境变量。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:322 +msgid "2. AddRMSNormQuant Fusion" +msgstr "2. AddRMSNormQuant融合" + +#: ../../source/tutorials/models/Qwen3-Dense.md:324 +msgid "" +"AddRMSNormQuant fusion merges the Address-wise Multi-Scale Normalization " +"and Quantization operations, allowing for more efficient memory access " +"and computation, thereby enhancing throughput." +msgstr "AddRMSNormQuant融合将地址感知多尺度归一化与量化操作合并,实现了更高效的内存访问和计算,从而提升了吞吐量。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:328 +msgid "3. FlashComm_v1" +msgstr "3. FlashComm_v1" + +#: ../../source/tutorials/models/Qwen3-Dense.md:330 +msgid "" +"FlashComm_v1 significantly improves performance in large-batch scenarios " +"by decomposing the traditional allreduce collective communication into " +"reduce-scatter and all-gather. This breakdown helps reduce the " +"computation of the RMSNorm token dimensions, leading to more efficient " +"processing. In quantization scenarios, FlashComm_v1 also reduces the " +"communication overhead by decreasing the bit-level data transfer, which " +"further minimizes the end-to-end latency during the prefill phase." +msgstr "FlashComm_v1通过将传统的allreduce集合通信分解为reduce-scatter和all-gather,显著提升了大批量场景下的性能。这种分解有助于减少RMSNorm令牌维度的计算,从而实现更高效的处理。在量化场景中,FlashComm_v1还通过减少比特级数据传输来降低通信开销,从而进一步最小化预填充阶段的端到端延迟。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:332 +msgid "" +"It is important to note that the decomposition of the allreduce " +"communication into reduce-scatter and all-gather operations only provides" +" benefits in high-concurrency scenarios, where there is no significant " +"communication degradation. In other cases, this decomposition may result " +"in noticeable performance degradation. To mitigate this, the current " +"implementation uses a threshold-based approach, where FlashComm_v1 is " +"only enabled if the actual token count for each inference schedule " +"exceeds the threshold. This ensures that the feature is only activated in" +" scenarios where it improves performance, avoiding potential degradation " +"in lower-concurrency situations." +msgstr "需要注意的是,将allreduce通信分解为reduce-scatter和all-gather操作仅在无显著通信降级的高并发场景下有益。在其他情况下,这种分解可能导致明显的性能下降。为缓解此问题,当前实现采用基于阈值的方法,仅当每个推理调度的实际令牌数超过阈值时才启用FlashComm_v1。这确保了该功能仅在能提升性能的场景下激活,避免了在低并发情况下可能出现的性能下降。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:334 +msgid "" +"This optimization requires setting the environment variable " +"`VLLM_ASCEND_ENABLE_FLASHCOMM1 = 1` to be enabled." +msgstr "此优化需要设置环境变量 `VLLM_ASCEND_ENABLE_FLASHCOMM1 = 1` 来启用。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:336 +msgid "4. Matmul and ReduceScatter Fusion" +msgstr "4. 矩阵乘法和ReduceScatter融合" + +#: ../../source/tutorials/models/Qwen3-Dense.md:338 +msgid "" +"Once FlashComm_v1 is enabled, an additional optimization can be applied. " +"This optimization fuses matrix multiplication and ReduceScatter " +"operations, along with tiling optimization. The Matmul computation is " +"treated as one pipeline, while the ReduceScatter and dequant operations " +"are handled in a separate pipeline. This approach significantly reduces " +"communication steps, improves computational efficiency, and allows for " +"better resource utilization, resulting in enhanced throughput, especially" +" in large-scale distributed environments." +msgstr "一旦启用FlashComm_v1,可以应用额外的优化。此优化融合了矩阵乘法和ReduceScatter操作,并包含分片优化。矩阵乘法计算被视为一个流水线,而ReduceScatter和反量化操作则在另一个独立的流水线中处理。这种方法显著减少了通信步骤,提高了计算效率,并实现了更好的资源利用,从而提升了吞吐量,尤其在大规模分布式环境中效果显著。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:340 +msgid "" +"This optimization is automatically enabled once FlashComm_v1 is " +"activated. However, due to an issue with performance degradation in " +"small-concurrency scenarios after this fusion, a threshold-based approach" +" is currently used to mitigate this problem. The optimization is only " +"applied when the token count exceeds the threshold, ensuring that it is " +"not enabled in cases where it could negatively impact performance." +msgstr "此优化在FlashComm_v1激活后会自动启用。然而,由于融合后在小并发场景下存在性能下降的问题,目前采用基于阈值的方法来缓解此问题。该优化仅在令牌数超过阈值时应用,确保在可能对性能产生负面影响的情况下不被启用。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:342 +msgid "5. Weight Prefetching" +msgstr "5. 权重预取" + +#: ../../source/tutorials/models/Qwen3-Dense.md:344 +msgid "" +"Weight prefetching optimizes memory usage by preloading weights into the " +"cache before they are needed, minimizing delays caused by memory access " +"during model execution." +msgstr "权重预取通过在需要之前将权重预加载到缓存中来优化内存使用,从而最小化模型执行期间因内存访问造成的延迟。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:346 +msgid "" +"In dense model scenarios, the MLP's gate_up_proj and down_proj linear " +"layers often exhibit relatively high MTE utilization. To address this, we" +" create a separate pipeline specifically for weight prefetching, which " +"runs in parallel with the original vector computation pipeline, such as " +"RMSNorm and SiLU, before the MLP. This approach allows the weights to be " +"preloaded to L2 cache ahead of time, reducing MTE utilization during the " +"MLP computations and indirectly improving Cube computation efficiency by " +"minimizing resource contention and optimizing data flow." +msgstr "在稠密模型场景中,MLP的gate_up_proj和down_proj线性层通常表现出相对较高的MTE利用率。为解决此问题,我们创建了一个专门用于权重预取的独立流水线,该流水线与MLP之前的原始向量计算流水线(如RMSNorm和SiLU)并行运行。这种方法允许权重提前预加载到L2缓存中,从而降低MLP计算期间的MTE利用率,并通过最小化资源争用和优化数据流,间接提升Cube计算效率。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:348 +#, python-brace-format +msgid "" +"Previously, the environment variables VLLM_ASCEND_ENABLE_PREFETCH_MLP " +"used to enable MLP weight prefetch and " +"VLLM_ASCEND_MLP_GATE_UP_PREFETCH_SIZE and " +"VLLM_ASCEND_MLP_DOWN_PREFETCH_SIZE used to set the weight prefetch size " +"for MLP gate_up_proj and down_proj were deprecated. Please use the " +"following configuration instead: \"weight_prefetch_config\": { " +"\"enabled\": true, \"prefetch_ratio\": { \"mlp\": { \"gate_up\": 1.0, " +"\"down\": 1.0}}}. See User Guide->Feature Guide->Weight Prefetch Guide " +"for details." +msgstr "之前用于启用MLP权重预取的环境变量 `VLLM_ASCEND_ENABLE_PREFETCH_MLP`,以及用于设置MLP gate_up_proj和down_proj权重预取大小的 `VLLM_ASCEND_MLP_GATE_UP_PREFETCH_SIZE` 和 `VLLM_ASCEND_MLP_DOWN_PREFETCH_SIZE` 已被弃用。请改用以下配置:`\"weight_prefetch_config\": { \"enabled\": true, \"prefetch_ratio\": { \"mlp\": { \"gate_up\": 1.0, \"down\": 1.0}}}`。详情请参阅用户指南->功能指南->权重预取指南。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:350 +msgid "6. Zerolike Elimination" +msgstr "6. Zerolike消除" + +#: ../../source/tutorials/models/Qwen3-Dense.md:352 +msgid "" +"This elimination removes unnecessary operations related to zero-like " +"tensors in Attention forward, improving the efficiency of matrix " +"operations and reducing memory usage." +msgstr "此消除操作移除了Attention前向传播中与类零张量相关的不必要操作,提高了矩阵运算效率并减少了内存使用。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:356 +msgid "7. FullGraph Optimization" +msgstr "7. 全图优化" + +#: ../../source/tutorials/models/Qwen3-Dense.md:358 +msgid "" +"ACLGraph offers several key optimizations to improve model execution " +"efficiency. By replaying the entire model execution graph at once, we " +"significantly reduce dispatch latency compared to multiple smaller " +"replays. This approach also stabilizes multi-device performance, as " +"capturing the model as a single static graph mitigates dispatch " +"fluctuations across devices. Additionally, consolidating graph captures " +"frees up streams, allowing for the capture of more graphs and optimizing " +"resource usage, ultimately leading to improved system efficiency and " +"reduced overhead." +msgstr "ACLGraph提供了多项关键优化以提升模型执行效率。通过一次性重放整个模型执行图,与多次重放较小图相比,我们显著降低了调度延迟。这种方法还能稳定多设备性能,因为将模型捕获为单个静态图可以缓解跨设备的调度波动。此外,整合图捕获可以释放流,从而允许捕获更多图并优化资源使用,最终提高系统效率并减少开销。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:360 +#, python-brace-format +msgid "" +"The configuration compilation_config = { \"cudagraph_mode\": " +"\"FULL_DECODE_ONLY\"} is used when starting the service. This setup is " +"necessary to enable the aclgraph's full decode-only mode." +msgstr "启动服务时使用配置 `compilation_config = { \"cudagraph_mode\": \"FULL_DECODE_ONLY\"}`。此设置对于启用aclgraph的完全仅解码模式是必需的。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:362 +msgid "8. Asynchronous Scheduling" +msgstr "8. 异步调度" + +#: ../../source/tutorials/models/Qwen3-Dense.md:364 +msgid "" +"Asynchronous scheduling is a technique used to optimize inference " +"efficiency. It allows non-blocking task scheduling to improve concurrency" +" and throughput, especially when processing large-scale models." +msgstr "异步调度是一种用于优化推理效率的技术。它允许非阻塞的任务调度,以提高并发性和吞吐量,尤其是在处理大规模模型时。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:366 +msgid "This optimization is enabled by setting `--async-scheduling`." +msgstr "此优化通过设置 `--async-scheduling` 来启用。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:368 +msgid "Optimization Highlights" +msgstr "优化亮点" + +#: ../../source/tutorials/models/Qwen3-Dense.md:370 +msgid "" +"Building on the specific example scenarios outlined earlier, this section" +" highlights the key tuning points that played a crucial role in achieving" +" optimal performance. By focusing on the most impactful adjustments to " +"hyperparameters and optimizations, we’ll emphasize the strategies that " +"can be leveraged to maximize throughput, minimize latency, and ensure " +"efficient resource utilization in various environments. These insights " +"will help guide you in fine-tuning your own configurations for the best " +"possible results." +msgstr "基于前面概述的具体示例场景,本节重点介绍在实现最佳性能中起关键作用的关键调优点。通过关注对超参数和优化最具影响力的调整,我们将强调可用于最大化吞吐量、最小化延迟并确保在各种环境中高效利用资源的策略。这些见解将帮助指导您微调自己的配置,以获得最佳结果。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:372 +msgid "1.Prefetch Buffer Size" +msgstr "1. 预取缓冲区大小" + +#: ../../source/tutorials/models/Qwen3-Dense.md:374 +msgid "" +"Setting the right prefetch buffer size is essential for optimizing weight" +" loading and the size of this prefetch buffer is directly related to the " +"time that can be hidden by vector computations. To achieve a near-perfect" +" overlap between the prefetch and computation streams, you can flexibly " +"adjust the buffer size by profiling and observing the degree of overlap " +"at different buffer sizes." +msgstr "设置正确的预取缓冲区大小对于优化权重加载至关重要,且此预取缓冲区的大小与向量计算可隐藏的时间直接相关。为了实现预取流与计算流近乎完美的重叠,您可以通过性能分析和观察不同缓冲区大小下的重叠程度来灵活调整缓冲区大小。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:376 +msgid "" +"For example, in the real-world scenario mentioned above, I set the " +"prefetch buffer size for the gate_up_proj and down_proj in the MLP to " +"18MB. The reason for this is that, at this value, the vector computations" +" of RMSNorm and SiLU can effectively hide the prefetch stream, thereby " +"accelerating the Matmul computations of the two linear layers." +msgstr "" +"例如,在上述实际场景中,我将MLP中gate_up_proj和down_proj的预取缓冲区大小设置为18MB。" +"这样做的原因是,在此数值下,RMSNorm和SiLU的向量计算能够有效隐藏预取流,从而加速两个线性层的Matmul计算。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:378 +msgid "2.Max-num-batched-tokens" +msgstr "2.最大批处理令牌数" + +#: ../../source/tutorials/models/Qwen3-Dense.md:380 +msgid "" +"The max-num-batched-tokens parameter determines the maximum number of " +"tokens that can be processed in a single batch. Adjusting this value " +"helps to balance throughput and memory usage. Setting this value too " +"small can negatively impact end-to-end performance, as fewer tokens are " +"processed per batch, potentially leading to inefficiencies. Conversely, " +"setting it too large increases the risk of Out of Memory (OOM) errors due" +" to excessive memory consumption." +msgstr "" +"最大批处理令牌数参数决定了单批次可处理的令牌数量上限。调整此值有助于平衡吞吐量与内存使用。" +"若设置过小,每批次处理的令牌数较少,可能降低效率,从而对端到端性能产生负面影响。" +"反之,若设置过大,则会因内存消耗过高而增加内存溢出(OOM)错误的风险。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:382 +msgid "" +"In the above real-world scenario, we not only conducted extensive testing" +" to determine the most cost-effective value, but also took into account " +"the accumulation of decode tokens when enabling chunked prefill. If the " +"value is set too small, a single request may被分块多次,并且在推理的早期阶段,一个批次可能只包含少量解码令牌。这可能导致端到端吞吐量达不到预期。" +msgstr "" +"在上述实际场景中,我们不仅通过大量测试确定了最具性价比的数值,还考虑了启用分块预填充时解码令牌的累积问题。" +"若该值设置过小,单个请求可能被多次分块处理,且在推理早期阶段,单个批次可能仅包含少量解码令牌,从而导致端到端吞吐量无法达到预期。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:384 +msgid "3.Cudagraph_capture_sizes" +msgstr "3.CUDA图捕获尺寸" + +#: ../../source/tutorials/models/Qwen3-Dense.md:386 +msgid "" +"The cudagraph_capture_sizes parameter controls the granularity of graph " +"captures during the inference process. Adjusting this value determines " +"how much of the computation graph is captured at once, which can " +"significantly impact both performance and memory usage." +msgstr "" +"CUDA图捕获尺寸参数控制推理过程中图捕获的粒度。调整此值决定了单次捕获的计算图范围,这对性能和内存使用均有显著影响。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:388 +msgid "" +"If this list is not manually specified, it will be filled with a series " +"of evenly distributed values, which typically ensures good performance. " +"However, if you want to fine-tune it further, manually specifying the " +"values will yield better results. This is because if the batch size falls" +" between two sizes, the framework will automatically pad the token count " +"to the larger size. This often leads to actual performance deviating from" +" the expected or even degrading." +msgstr "" +"若未手动指定此列表,系统将自动填充一系列均匀分布的值,这通常能保证良好性能。" +"但若需进一步微调,手动指定数值将获得更佳效果。这是因为当批次大小介于两个尺寸之间时,框架会自动将令牌数填充至较大尺寸,这常导致实际性能偏离预期甚至下降。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:390 +msgid "" +"Therefore, like the above real-world scenario, when adjusting the " +"benchmark request concurrency, we always ensure that the concurrency is " +"actually included in the cudagraph_capture_sizes list. This way, during " +"the decode phase, padding operations are essentially avoided, ensuring " +"the reliability of the experimental data." +msgstr "" +"因此,如上述实际场景所示,在调整基准测试请求并发度时,我们始终确保并发度实际包含在CUDA图捕获尺寸列表中。" +"这样在解码阶段基本避免了填充操作,从而保证了实验数据的可靠性。" + +#: ../../source/tutorials/models/Qwen3-Dense.md:392 +msgid "" +"It's important to note that if you enable FlashComm_v1, the values in " +"this list must be integer multiples of the TP size. Any values that do " +"not meet this condition will be automatically filtered out. Therefore, I " +"recommend incrementally adding concurrency based on the TP size after " +"enabling FlashComm_v1." +msgstr "" +"需特别注意,若启用FlashComm_v1,此列表中的值必须是TP尺寸的整数倍。不满足此条件的任何值都将被自动过滤。" +"因此,建议在启用FlashComm_v1后,基于TP尺寸逐步增加并发度。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Next.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Next.po new file mode 100644 index 00000000..6b03f128 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Next.po @@ -0,0 +1,269 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Qwen3-Next.md:1 +msgid "Qwen3-Next" +msgstr "Qwen3-Next" + +#: ../../source/tutorials/models/Qwen3-Next.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/Qwen3-Next.md:5 +msgid "" +"The Qwen3-Next model is a sparse MoE (Mixture of Experts) model with high" +" sparsity. Compared to the MoE architecture of Qwen3, it has introduced " +"key improvements in aspects such as the hybrid attention mechanism and " +"multi-token prediction mechanism, enhancing the training and inference " +"efficiency of the model under long contexts and large total parameter " +"scales." +msgstr "" +"Qwen3-Next 模型是一个具有高稀疏性的稀疏 MoE(专家混合)模型。与 Qwen3 的 MoE 架构相比,它在混合注意力机制和多令牌预测机制等方面引入了关键改进,提升了模型在长上下文和大总参数量规模下的训练和推理效率。" + +#: ../../source/tutorials/models/Qwen3-Next.md:7 +msgid "" +"This document will present the core verification steps of the model, " +"including supported features, environment preparation, as well as " +"accuracy and performance evaluation. Qwen3 Next is currently using Triton" +" Ascend, which is in the experimental phase. In subsequent versions, its " +"performance related to stability and accuracy may change, and performance" +" will be continuously optimized." +msgstr "" +"本文档将介绍该模型的核心验证步骤,包括支持的功能、环境准备以及精度和性能评估。Qwen3 Next 目前使用处于实验阶段的 Triton Ascend。在后续版本中,其与稳定性和精度相关的表现可能会发生变化,性能将持续优化。" + +#: ../../source/tutorials/models/Qwen3-Next.md:9 +msgid "The `Qwen3-Next` model is first supported in `vllm-ascend:v0.10.2rc1`." +msgstr "`Qwen3-Next` 模型首次在 `vllm-ascend:v0.10.2rc1` 中得到支持。" + +#: ../../source/tutorials/models/Qwen3-Next.md:11 +msgid "Supported Features" +msgstr "支持的功能" + +#: ../../source/tutorials/models/Qwen3-Next.md:13 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考[支持的功能](../../user_guide/support_matrix/supported_models.md)以获取模型支持的功能矩阵。" + +#: ../../source/tutorials/models/Qwen3-Next.md:15 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "请参考[功能指南](../../user_guide/feature_guide/index.md)以获取功能的配置信息。" + +#: ../../source/tutorials/models/Qwen3-Next.md:17 +msgid "Weight Preparation" +msgstr "权重准备" + +#: ../../source/tutorials/models/Qwen3-Next.md:19 +msgid "" +"Download Link for the `Qwen3-Next-80B-A3B-Instruct` Model Weights: " +"[Download model weight](https://modelscope.cn/models/Qwen/Qwen3-Next-80B-" +"A3B-Instruct)" +msgstr "`Qwen3-Next-80B-A3B-Instruct` 模型权重下载链接:[下载模型权重](https://modelscope.cn/models/Qwen/Qwen3-Next-80B-A3B-Instruct)" + +#: ../../source/tutorials/models/Qwen3-Next.md:21 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/Qwen3-Next.md:23 +msgid "" +"If the machine environment is an Atlas 800I A3(64G*16), the deployment " +"approach stays identical." +msgstr "如果机器环境是 Atlas 800I A3(64G*16),部署方法保持不变。" + +#: ../../source/tutorials/models/Qwen3-Next.md:25 +msgid "Run docker container" +msgstr "运行 Docker 容器" + +#: ../../source/tutorials/models/Qwen3-Next.md:54 +msgid "" +"The Qwen3 Next is using [Triton Ascend](https://gitee.com/ascend/triton-" +"ascend) which is currently experimental. In future versions, there may be" +" behavioral changes related to stability, accuracy, and performance " +"improvement." +msgstr "Qwen3 Next 正在使用目前处于实验阶段的 [Triton Ascend](https://gitee.com/ascend/triton-ascend)。在未来的版本中,可能会有与稳定性、精度和性能改进相关的行为变化。" + +#: ../../source/tutorials/models/Qwen3-Next.md:56 +msgid "Inference" +msgstr "推理" + +#: ../../source/tutorials/models/Qwen3-Next.md +msgid "Online Inference" +msgstr "在线推理" + +#: ../../source/tutorials/models/Qwen3-Next.md:62 +msgid "Run the following script to start the vLLM server on multi-NPU:" +msgstr "运行以下脚本在多 NPU 上启动 vLLM 服务器:" + +#: ../../source/tutorials/models/Qwen3-Next.md:68 +msgid "Once your server is started, you can query the model with input prompts." +msgstr "服务器启动后,您可以使用输入提示词查询模型。" + +#: ../../source/tutorials/models/Qwen3-Next.md +msgid "Offline Inference" +msgstr "离线推理" + +#: ../../source/tutorials/models/Qwen3-Next.md:87 +msgid "Run the following script to execute offline inference on multi-NPU:" +msgstr "运行以下脚本在多 NPU 上执行离线推理:" + +#: ../../source/tutorials/models/Qwen3-Next.md:125 +msgid "If you run this script successfully, you can see the info shown below:" +msgstr "如果成功运行此脚本,您将看到如下信息:" + +#: ../../source/tutorials/models/Qwen3-Next.md:133 +msgid "Accuracy Evaluation" +msgstr "精度评估" + +#: ../../source/tutorials/models/Qwen3-Next.md:135 +#: ../../source/tutorials/models/Qwen3-Next.md:147 +msgid "Using AISBench" +msgstr "使用 AISBench" + +#: ../../source/tutorials/models/Qwen3-Next.md:137 +msgid "" +"Refer to [Using " +"AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " +"details." +msgstr "详情请参考[使用 AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" + +#: ../../source/tutorials/models/Qwen3-Next.md:139 +msgid "" +"After execution, you can get the result, here is the result of `Qwen3" +"-Next-80B-A3B-Instruct` in `vllm-ascend:0.13.0rc1` for reference only." +msgstr "执行后,您可以获得结果,以下是 `vllm-ascend:0.13.0rc1` 中 `Qwen3-Next-80B-A3B-Instruct` 的结果,仅供参考。" + +#: ../../source/tutorials/models/Qwen3-Next.md:85 +msgid "dataset" +msgstr "数据集" + +#: ../../source/tutorials/models/Qwen3-Next.md:85 +msgid "version" +msgstr "版本" + +#: ../../source/tutorials/models/Qwen3-Next.md:85 +msgid "metric" +msgstr "指标" + +#: ../../source/tutorials/models/Qwen3-Next.md:85 +msgid "mode" +msgstr "模式" + +#: ../../source/tutorials/models/Qwen3-Next.md:85 +msgid "vllm-api-general-chat" +msgstr "vllm-api-general-chat" + +#: ../../source/tutorials/models/Qwen3-Next.md:85 +msgid "gsm8k" +msgstr "gsm8k" + +#: ../../source/tutorials/models/Qwen3-Next.md:85 +msgid "-" +msgstr "-" + +#: ../../source/tutorials/models/Qwen3-Next.md:85 +msgid "accuracy" +msgstr "准确率" + +#: ../../source/tutorials/models/Qwen3-Next.md:85 +msgid "gen" +msgstr "生成" + +#: ../../source/tutorials/models/Qwen3-Next.md:85 +msgid "95.53" +msgstr "95.53" + +#: ../../source/tutorials/models/Qwen3-Next.md:145 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/Qwen3-Next.md:149 +msgid "" +"Refer to [Using AISBench for performance " +"evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation) for details." +msgstr "详情请参考[使用 AISBench 进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" + +#: ../../source/tutorials/models/Qwen3-Next.md:151 +msgid "Using vLLM Benchmark" +msgstr "使用 vLLM Benchmark" + +#: ../../source/tutorials/models/Qwen3-Next.md:153 +msgid "Run performance evaluation of `Qwen3-Next` as an example." +msgstr "以运行 `Qwen3-Next` 的性能评估为例。" + +#: ../../source/tutorials/models/Qwen3-Next.md:155 +msgid "" +"Refer to [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/) " +"for more details." +msgstr "更多详情请参考 [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/)。" + +#: ../../source/tutorials/models/Qwen3-Next.md:157 +msgid "There are three `vllm bench` subcommands:" +msgstr "`vllm bench` 有三个子命令:" + +#: ../../source/tutorials/models/Qwen3-Next.md:159 +msgid "`latency`: Benchmark the latency of a single batch of requests." +msgstr "`latency`:对单批请求的延迟进行基准测试。" + +#: ../../source/tutorials/models/Qwen3-Next.md:160 +msgid "`serve`: Benchmark the online serving throughput." +msgstr "`serve`:对在线服务吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Qwen3-Next.md:161 +msgid "`throughput`: Benchmark offline inference throughput." +msgstr "`throughput`:对离线推理吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Qwen3-Next.md:163 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例。按如下方式运行代码。" + +#: ../../source/tutorials/models/Qwen3-Next.md:170 +msgid "" +"After about several minutes, you can get the performance evaluation " +"result." +msgstr "大约几分钟后,您将获得性能评估结果。" + +#: ../../source/tutorials/models/Qwen3-Next.md:172 +msgid "The performance result is:" +msgstr "性能结果如下:" + +#: ../../source/tutorials/models/Qwen3-Next.md:174 +msgid "**Hardware**: A3-752T, 2 node" +msgstr "**硬件**:A3-752T,2 节点" + +#: ../../source/tutorials/models/Qwen3-Next.md:176 +msgid "**Deployment**: TP4 + Full Decode Only" +msgstr "**部署**:TP4 + 仅全解码" + +#: ../../source/tutorials/models/Qwen3-Next.md:178 +msgid "**Input/Output**: 2k/2k" +msgstr "**输入/输出**:2k/2k" + +#: ../../source/tutorials/models/Qwen3-Next.md:180 +msgid "**Concurrency**: 32" +msgstr "**并发数**:32" + +#: ../../source/tutorials/models/Qwen3-Next.md:182 +msgid "**Performance**: 580tps, TPOT 54ms" +msgstr "**性能**:580tps,TPOT 54ms" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.po new file mode 100644 index 00000000..7d0240d5 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.po @@ -0,0 +1,230 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:1 +msgid "Qwen3-Omni-30B-A3B-Thinking" +msgstr "Qwen3-Omni-30B-A3B-Thinking" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:5 +msgid "" +"Qwen3-Omni is the natively end-to-end multilingual omni-modal foundation " +"models. It processes text, images, audio, and video, and delivers real-" +"time streaming responses in both text and natural speech. We introduce " +"several architectural upgrades to improve performance and efficiency. The" +" Thinking model of Qwen3-Omni-30B-A3B, containing the thinker component, " +"equipped with chain-of-thought reasoning, supporting audio, video, and " +"text input, with text output." +msgstr "" +"Qwen3-Omni 是原生端到端多语言全模态基础模型。它能处理文本、图像、音频和视频,并以文本和自然语音形式提供实时流式响应。我们引入了多项架构升级以提升性能和效率。Qwen3-Omni-30B-A3B 的 Thinking 模型包含思考器组件,具备思维链推理能力,支持音频、视频和文本输入,输出为文本。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:7 +msgid "" +"This document will show the main verification steps of the model, " +"including supported features, feature configuration, environment " +"preparation, single-node deployment, accuracy and performance evaluation." +msgstr "本文档将展示该模型的主要验证步骤,包括支持的功能、功能配置、环境准备、单节点部署、精度和性能评估。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:9 +msgid "Supported Features" +msgstr "支持的功能" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:11 +msgid "" +"Refer to [supported features](https://docs.vllm.ai/projects/ascend/zh-" +"cn/latest/user_guide/support_matrix/supported_models.html) to get the " +"model's supported feature matrix." +msgstr "请参考 [支持的功能](https://docs.vllm.ai/projects/ascend/zh-cn/latest/user_guide/support_matrix/supported_models.html) 以获取模型支持的功能矩阵。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:13 +msgid "" +"Refer to [feature guide](https://docs.vllm.ai/projects/ascend/zh-" +"cn/latest/user_guide/feature_guide/index.html) to get the feature's " +"configuration." +msgstr "请参考 [功能指南](https://docs.vllm.ai/projects/ascend/zh-cn/latest/user_guide/feature_guide/index.html) 以获取功能的配置信息。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:15 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:17 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:19 +msgid "" +"`Qwen3-Omni-30B-A3B-Thinking` requires 2 NPU Cards(64G × 2).[Download " +"model weight](https://modelscope.cn/models/Qwen/Qwen3-Omni-30B-A3B-" +"Thinking) It is recommended to download the model weight to the shared " +"directory of multiple nodes, such as `/root/.cache/`" +msgstr "" +"`Qwen3-Omni-30B-A3B-Thinking` 需要 2 张 NPU 卡 (64G × 2)。[下载模型权重](https://modelscope.cn/models/Qwen/Qwen3-Omni-30B-A3B-Thinking)。建议将模型权重下载到多节点的共享目录,例如 `/root/.cache/`。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:22 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md +msgid "Use docker image" +msgstr "使用 Docker 镜像" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:28 +msgid "" +"You can use our official docker image to run Qwen3-Omni-30B-A3B-Thinking " +"directly" +msgstr "您可以使用我们的官方 Docker 镜像直接运行 Qwen3-Omni-30B-A3B-Thinking" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:30 +msgid "" +"Select an image based on your machine type and start the docker image on " +"your node, refer to [using docker](../../installation.md#set-up-using-" +"docker)." +msgstr "根据您的机器类型选择镜像并在节点上启动 Docker 镜像,请参考 [使用 Docker](../../installation.md#set-up-using-docker)。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md +msgid "Build from source" +msgstr "从源码构建" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:65 +msgid "You can build all from source." +msgstr "您可以从源码构建所有组件。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:67 +msgid "" +"Install `vllm-ascend`, refer to [set up using " +"python](../../installation.md#set-up-using-python)." +msgstr "安装 `vllm-ascend`,请参考 [使用 Python 设置](../../installation.md#set-up-using-python)。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:71 +msgid "Please install system dependencies" +msgstr "请安装系统依赖" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:81 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:83 +msgid "Single-node Deployment" +msgstr "单节点部署" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:85 +msgid "Offline Inference on Multi-NPU" +msgstr "多 NPU 离线推理" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:87 +msgid "Run the following script to execute offline inference on multi-NPU:" +msgstr "运行以下脚本在多 NPU 上执行离线推理:" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:174 +msgid "Online Inference on Multi-NPU" +msgstr "多 NPU 在线推理" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:176 +msgid "" +"Run the following script to start the vLLM server on Multi-NPU: For an " +"Atlas A2 with 64 GB of NPU card memory, tensor-parallel-size should be at" +" least 1, and for 32 GB of memory, tensor-parallel-size should be at " +"least 2." +msgstr "运行以下脚本在多 NPU 上启动 vLLM 服务器:对于具有 64 GB NPU 卡内存的 Atlas A2,tensor-parallel-size 应至少为 1;对于 32 GB 内存,tensor-parallel-size 应至少为 2。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:188 +msgid "Functional Verification" +msgstr "功能验证" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:190 +msgid "Once your server is started, you can query the model with input prompts." +msgstr "服务器启动后,您可以使用输入提示词查询模型。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:231 +msgid "Accuracy Evaluation" +msgstr "精度评估" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:233 +msgid "Here are accuracy evaluation methods." +msgstr "以下是精度评估方法。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:235 +msgid "Using EvalScope" +msgstr "使用 EvalScope" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:237 +msgid "" +"As an example, take the `gsm8k` `omnibench` `bbh` dataset as a test " +"dataset, and run accuracy evaluation of `Qwen3-Omni-30B-A3B-Thinking` in " +"online mode." +msgstr "以 `gsm8k`、`omnibench`、`bbh` 数据集作为测试数据集为例,在在线模式下运行 `Qwen3-Omni-30B-A3B-Thinking` 的精度评估。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:239 +msgid "" +"Refer to Using " +"evalscope() for `evalscope`installation." +msgstr "关于 `evalscope` 的安装,请参考使用 evalscope ()。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:240 +msgid "Run `evalscope` to execute the accuracy evaluation." +msgstr "运行 `evalscope` 以执行精度评估。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:255 +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:296 +msgid "" +"After execution, you can get the result, here is the result of `Qwen3" +"-Omni-30B-A3B-Thinking` in vllm-ascend:0.13.0rc1 for reference only." +msgstr "执行后,您可以获得结果。以下是 `Qwen3-Omni-30B-A3B-Thinking` 在 vllm-ascend:0.13.0rc1 中的结果,仅供参考。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:269 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:271 +msgid "Using vLLM Benchmark" +msgstr "使用 vLLM 基准测试" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:273 +msgid "" +"Run performance evaluation of `Qwen3-Omni-30B-A3B-Thinking` as an " +"example. Refer to vllm benchmark for more details. Refer to [vllm " +"benchmark](https://docs.vllm.ai/en/latest/benchmarking/) for more " +"details." +msgstr "以运行 `Qwen3-Omni-30B-A3B-Thinking` 的性能评估为例。更多详情请参考 vllm 基准测试。更多详情请参考 [vllm 基准测试](https://docs.vllm.ai/en/latest/benchmarking/)。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:277 +msgid "There are three `vllm bench` subcommands:" +msgstr "`vllm bench` 有三个子命令:" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:279 +msgid "`latency`: Benchmark the latency of a single batch of requests." +msgstr "`latency`:对单批次请求的延迟进行基准测试。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:280 +msgid "`serve`: Benchmark the online serving throughput." +msgstr "`serve`:对在线服务吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:281 +msgid "`throughput`: Benchmark offline inference throughput." +msgstr "`throughput`:对离线推理吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:283 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例。按如下方式运行代码。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-235B-A22B-Instruct.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-235B-A22B-Instruct.po new file mode 100644 index 00000000..f7e27e53 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-235B-A22B-Instruct.po @@ -0,0 +1,433 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:1 +msgid "Qwen3-VL-235B-A22B-Instruct" +msgstr "Qwen3-VL-235B-A22B-Instruct" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:5 +msgid "" +"The Qwen-VL(Vision-Language)series from Alibaba Cloud comprises a family " +"of powerful Large Vision-Language Models (LVLMs) designed for " +"comprehensive multimodal understanding. They accept images, text, and " +"bounding boxes as input, and output text and detection boxes, enabling " +"advanced functions like image detection, multi-modal dialogue, and multi-" +"image reasoning." +msgstr "" +"阿里云的Qwen-VL(视觉-语言)系列包含一系列强大的大型视觉语言模型(LVLM),专为全面的多模态理解而设计。它们接受图像、文本和边界框作为输入,并输出文本和检测框,从而实现图像检测、多模态对话和多图像推理等高级功能。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:7 +msgid "" +"This document will show the main verification steps of the model, " +"including supported features, feature configuration, environment " +"preparation, NPU deployment, accuracy and performance evaluation." +msgstr "本文档将展示该模型的主要验证步骤,包括支持的功能、功能配置、环境准备、NPU部署、精度和性能评估。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:9 +msgid "" +"This tutorial uses the vLLM-Ascend `v0.11.0rc2` version for " +"demonstration, showcasing the `Qwen3-VL-235B-A22B-Instruct` model as an " +"example for multi-NPU deployment." +msgstr "本教程使用 vLLM-Ascend `v0.11.0rc2` 版本进行演示,以 `Qwen3-VL-235B-A22B-Instruct` 模型为例展示多NPU部署。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:11 +msgid "Supported Features" +msgstr "支持的功能" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:13 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考[支持的功能](../../user_guide/support_matrix/supported_models.md)以获取模型支持的功能矩阵。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:15 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "请参考[功能指南](../../user_guide/feature_guide/index.md)以获取功能的配置信息。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:17 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:19 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:21 +msgid "" +"`Qwen3-VL-235B-A22B-Instruct`(BF16 version): require 1 Atlas 800 A3 (64G " +"× 16) node,2 Atlas 800 A2(64G × 8)nodes. [Download model " +"weight](https://modelscope.cn/models/Qwen/Qwen3-VL-235B-A22B-Instruct/)" +msgstr "" +"`Qwen3-VL-235B-A22B-Instruct`(BF16版本):需要1个Atlas 800 A3(64G × 16)节点,2个Atlas 800 A2(64G × 8)节点。[下载模型权重](https://modelscope.cn/models/Qwen/Qwen3-VL-235B-A22B-Instruct/)" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:23 +msgid "" +"It is recommended to download the model weight to the shared directory of" +" multiple nodes, such as `/root/.cache/`" +msgstr "建议将模型权重下载到多个节点的共享目录中,例如 `/root/.cache/`" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:25 +msgid "Verify Multi-node Communication(Optional)" +msgstr "验证多节点通信(可选)" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:27 +msgid "" +"If you want to deploy multi-node environment, you need to verify multi-" +"node communication according to [verify multi-node communication " +"environment](../../installation.md#verify-multi-node-communication)." +msgstr "如果您想部署多节点环境,需要根据[验证多节点通信环境](../../installation.md#verify-multi-node-communication)来验证多节点通信。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:29 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md +msgid "Use docker image" +msgstr "使用Docker镜像" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:35 +msgid "" +"For example, using images `quay.io/ascend/vllm-ascend:v0.11.0rc2`(for " +"Atlas 800 A2) and `quay.io/ascend/vllm-ascend:v0.11.0rc2-a3`(for Atlas " +"800 A3)." +msgstr "例如,使用镜像 `quay.io/ascend/vllm-ascend:v0.11.0rc2`(适用于Atlas 800 A2)和 `quay.io/ascend/vllm-ascend:v0.11.0rc2-a3`(适用于Atlas 800 A3)。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:37 +msgid "" +"Select an image based on your machine type and start the docker image on " +"your node, refer to [using docker](../../installation.md#set-up-using-" +"docker)." +msgstr "根据您的机器类型选择镜像并在节点上启动Docker镜像,请参考[使用Docker](../../installation.md#set-up-using-docker)。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md +msgid "Build from source" +msgstr "从源码构建" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:78 +msgid "You can build all from source." +msgstr "您可以从源码构建所有组件。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:80 +msgid "" +"Install `vllm-ascend`, refer to [set up using " +"python](../../installation.md#set-up-using-python)." +msgstr "安装 `vllm-ascend`,请参考[使用Python设置](../../installation.md#set-up-using-python)。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:84 +msgid "" +"If you want to deploy multi-node environment, you need to set up " +"environment on each node." +msgstr "如果您想部署多节点环境,需要在每个节点上设置环境。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:86 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:88 +msgid "Multi-node Deployment with MP (Recommended)" +msgstr "使用MP进行多节点部署(推荐)" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:90 +msgid "" +"Assume you have Atlas 800 A3 (64G*16) nodes (or 2* A2), and want to " +"deploy the `Qwen3-VL-235B-A22B-Instruct` model across multiple nodes." +msgstr "假设您拥有Atlas 800 A3(64G*16)节点(或2个A2节点),并希望跨多个节点部署 `Qwen3-VL-235B-A22B-Instruct` 模型。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:92 +msgid "Node 0" +msgstr "节点 0" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:135 +msgid "Node1" +msgstr "节点 1" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:182 +msgid "The parameters are explained as follows:" +msgstr "参数解释如下:" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:184 +msgid "" +"`--max-model-len` represents the context length, which is the maximum " +"value of the input plus output for a single request." +msgstr "`--max-model-len` 表示上下文长度,即单个请求的输入加输出的最大值。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:185 +msgid "" +"`--max-num-seqs` indicates the maximum number of requests that each DP " +"group is allowed to process. If the number of requests sent to the " +"service exceeds this limit, the excess requests will remain in a waiting " +"state and will not be scheduled. Note that the time spent in the waiting " +"state is also counted in metrics such as TTFT and TPOT. Therefore, when " +"testing performance, it is generally recommended that `--max-num-seqs` * " +"`--data-parallel-size` >= the actual total concurrency." +msgstr "" +"`--max-num-seqs` 表示每个DP组允许处理的最大请求数。如果发送到服务的请求数超过此限制,超出的请求将保持在等待状态,不会被调度。请注意,等待状态所花费的时间也会计入TTFT和TPOT等指标。因此,在测试性能时,通常建议 `--max-num-seqs` * `--data-parallel-size` >= 实际总并发数。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:186 +msgid "" +"`--max-num-batched-tokens` represents the maximum number of tokens that " +"the model can process in a single step. Currently, vLLM v1 scheduling " +"enables ChunkPrefill/SplitFuse by default, which means:" +msgstr "`--max-num-batched-tokens` 表示模型在单步中可以处理的最大token数。目前,vLLM v1调度默认启用ChunkPrefill/SplitFuse,这意味着:" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:187 +msgid "" +"(1) If the input length of a request is greater than `--max-num-batched-" +"tokens`, it will be divided into multiple rounds of computation according" +" to `--max-num-batched-tokens`;" +msgstr "(1)如果请求的输入长度大于 `--max-num-batched-tokens`,它将根据 `--max-num-batched-tokens` 被分成多轮计算;" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:188 +msgid "" +"(2) Decode requests are prioritized for scheduling, and prefill requests " +"are scheduled only if there is available capacity." +msgstr "(2)解码请求优先被调度,而预填充请求仅在有空闲容量时才会被调度。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:189 +msgid "" +"Generally, if `--max-num-batched-tokens` is set to a larger value, the " +"overall latency will be lower, but the pressure on GPU memory (activation" +" value usage) will be greater." +msgstr "通常,如果将 `--max-num-batched-tokens` 设置为较大的值,整体延迟会更低,但GPU内存(激活值使用)的压力会更大。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:190 +msgid "" +"`--gpu-memory-utilization` represents the proportion of HBM that vLLM " +"will use for actual inference. Its essential function is to calculate the" +" available kv_cache size. During the warm-up phase (referred to as " +"profile run in vLLM), vLLM records the peak GPU memory usage during an " +"inference process with an input size of `--max-num-batched-tokens`. The " +"available kv_cache size is then calculated as: `--gpu-memory-utilization`" +" * HBM size - peak GPU memory usage. Therefore, the larger the value of " +"`--gpu-memory-utilization`, the more kv_cache can be used. However, since" +" the GPU memory usage during the warm-up phase may differ from that " +"during actual inference (e.g., due to uneven EP load), setting `--gpu-" +"memory-utilization` too high may lead to OOM (Out of Memory) issues " +"during actual inference. The default value is `0.9`." +msgstr "" +"`--gpu-memory-utilization` 表示vLLM将用于实际推理的HBM比例。其主要功能是计算可用的kv_cache大小。在预热阶段(在vLLM中称为profile run),vLLM会记录输入大小为 `--max-num-batched-tokens` 的推理过程中的峰值GPU内存使用量。然后,可用的kv_cache大小计算为:`--gpu-memory-utilization` * HBM大小 - 峰值GPU内存使用量。因此,`--gpu-memory-utilization` 的值越大,可用的kv_cache就越多。然而,由于预热阶段的GPU内存使用量可能与实际推理阶段不同(例如,由于EP负载不均衡),将 `--gpu-memory-utilization` 设置得过高可能导致实际推理时出现OOM(内存不足)问题。默认值为 `0.9`。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:191 +msgid "" +"`--enable-expert-parallel` indicates that EP is enabled. Note that vLLM " +"does not support a mixed approach of ETP and EP; that is, MoE can either " +"use pure EP or pure TP." +msgstr "`--enable-expert-parallel` 表示启用了EP。请注意,vLLM不支持ETP和EP的混合方法;也就是说,MoE只能使用纯EP或纯TP。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:192 +msgid "" +"`--no-enable-prefix-caching` indicates that prefix caching is disabled. " +"To enable it, remove this option." +msgstr "`--no-enable-prefix-caching` 表示前缀缓存被禁用。要启用它,请移除此选项。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:193 +msgid "" +"`--quantization` \"ascend\" indicates that quantization is used. To " +"disable quantization, remove this option." +msgstr "`--quantization` \"ascend\" 表示使用了量化。要禁用量化,请移除此选项。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:194 +msgid "" +"`--compilation-config` contains configurations related to the aclgraph " +"graph mode. The most significant configurations are \"cudagraph_mode\" " +"and \"cudagraph_capture_sizes\", which have the following meanings: " +"\"cudagraph_mode\": represents the specific graph mode. Currently, " +"\"PIECEWISE\" and \"FULL_DECODE_ONLY\" are supported. The graph mode is " +"mainly used to reduce the cost of operator dispatch. Currently, " +"\"FULL_DECODE_ONLY\" is recommended." +msgstr "" +"`--compilation-config` 包含与aclgraph图模式相关的配置。最重要的配置是 \"cudagraph_mode\" 和 \"cudagraph_capture_sizes\",其含义如下:\"cudagraph_mode\":表示特定的图模式。目前支持 \"PIECEWISE\" 和 \"FULL_DECODE_ONLY\"。图模式主要用于降低算子调度的开销。目前推荐使用 \"FULL_DECODE_ONLY\"。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:196 +msgid "" +"\"cudagraph_capture_sizes\": represents different levels of graph modes. " +"The default value is [1, 2, 4, 8, 16, 24, 32, 40,..., `--max-num-seqs`]. " +"In the graph mode, the input for graphs at different levels is fixed, and" +" inputs between levels are automatically padded to the next level. " +"Currently, the default setting is recommended. Only in some scenarios is " +"it necessary to set this separately to achieve optimal performance." +msgstr "" +"\"cudagraph_capture_sizes\":表示不同级别的图模式。默认值为 [1, 2, 4, 8, 16, 24, 32, 40,..., `--max-num-seqs`]。在图模式下,不同级别图的输入是固定的,级别之间的输入会自动填充到下一个级别。目前推荐使用默认设置。仅在部分场景中需要单独设置此参数以达到最佳性能。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:197 +msgid "" +"`export VLLM_ASCEND_ENABLE_FLASHCOMM1=1` indicates that Flashcomm1 " +"optimization is enabled. Currently, this optimization is only supported " +"for MoE in scenarios where tp_size > 1." +msgstr "`export VLLM_ASCEND_ENABLE_FLASHCOMM1=1` 表示启用了Flashcomm1优化。目前,此优化仅在 tp_size > 1 的场景中支持MoE。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:199 +msgid "" +"If the service starts successfully, the following information will be " +"displayed on node 0:" +msgstr "如果服务启动成功,节点0上将显示以下信息:" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:210 +msgid "Multi-node Deployment with Ray" +msgstr "使用Ray进行多节点部署" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:212 +msgid "refer to [Ray Distributed (Qwen/Qwen3-235B-A22B)](../features/ray.md)." +msgstr "请参考[Ray分布式(Qwen/Qwen3-235B-A22B)](../features/ray.md)。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:214 +msgid "Prefill-Decode Disaggregation" +msgstr "预填充-解码解耦" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:216 +msgid "" +"refer to [Prefill-Decode Disaggregation Mooncake " +"Verification](../features/pd_disaggregation_mooncake_multi_node.md)" +msgstr "请参考[预填充-解码解耦月饼验证](../features/pd_disaggregation_mooncake_multi_node.md)" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:218 +msgid "Functional Verification" +msgstr "功能验证" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:220 +msgid "Once your server is started, you can query the model with input prompts:" +msgstr "一旦您的服务器启动,您可以使用输入提示词查询模型:" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:237 +msgid "Accuracy Evaluation" +msgstr "精度评估" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:239 +msgid "Here are two accuracy evaluation methods." +msgstr "这里有两种精度评估方法。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:241 +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:253 +msgid "Using AISBench" +msgstr "使用AISBench" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:243 +msgid "" +"Refer to [Using " +"AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " +"details." +msgstr "详情请参考[使用AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:245 +msgid "" +"After execution, you can get the result, here is the result of `Qwen3-VL-" +"235B-A22B-Instruct` in `vllm-ascend:0.11.0rc2` for reference only." +msgstr "执行后,您可以获得结果,以下是 `Qwen3-VL-235B-A22B-Instruct` 在 `vllm-ascend:0.11.0rc2` 中的结果,仅供参考。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:76 +msgid "dataset" +msgstr "数据集" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:76 +msgid "version" +msgstr "版本" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:76 +msgid "metric" +msgstr "指标" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:76 +msgid "mode" +msgstr "模式" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:76 +msgid "vllm-api-general-chat" +msgstr "vllm-api-general-chat" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:76 +msgid "aime2024" +msgstr "aime2024" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:76 +msgid "-" +msgstr "-" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:76 +msgid "accuracy" +msgstr "准确率" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:76 +msgid "gen" +msgstr "生成" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:76 +msgid "93" +msgstr "93" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:251 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:255 +msgid "" +"Refer to [Using AISBench for performance " +"evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation) for details." +msgstr "" +"详情请参阅[使用 AISBench 进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:257 +msgid "Using vLLM Benchmark" +msgstr "使用 vLLM Benchmark" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:259 +msgid "Run performance evaluation of `Qwen3-VL-235B-A22B-Instruct` as an example." +msgstr "以运行 `Qwen3-VL-235B-A22B-Instruct` 的性能评估为例。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:261 +msgid "" +"Refer to [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/) " +"for more details." +msgstr "" +"更多详情请参阅 [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/)。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:263 +msgid "There are three `vllm bench` subcommands:" +msgstr "`vllm bench` 包含三个子命令:" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:265 +msgid "`latency`: Benchmark the latency of a single batch of requests." +msgstr "`latency`:对单批次请求的延迟进行基准测试。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:266 +msgid "`serve`: Benchmark the online serving throughput." +msgstr "`serve`:对在线服务吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:267 +msgid "`throughput`: Benchmark offline inference throughput." +msgstr "`throughput`:对离线推理吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:269 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例,按如下方式运行代码。" + +#: ../../source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md:276 +msgid "" +"After about several minutes, you can get the performance evaluation " +"result." +msgstr "大约几分钟后,您将获得性能评估结果。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-30B-A3B-Instruct.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-30B-A3B-Instruct.po new file mode 100644 index 00000000..925d3fd4 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-30B-A3B-Instruct.po @@ -0,0 +1,199 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:1 +msgid "Qwen3-VL-30B-A3B-Instruct" +msgstr "Qwen3-VL-30B-A3B-Instruct" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:5 +msgid "" +"The Qwen-VL (Vision-Language) series from Alibaba Cloud comprises a " +"family of powerful Large Vision-Language Models (LVLMs) designed for " +"comprehensive multimodal understanding. They accept images, text, and " +"bounding boxes as input, and output text and detection boxes, enabling " +"advanced functions like image detection, multi-modal dialogue, and multi-" +"image reasoning." +msgstr "" +"阿里云的 Qwen-VL(视觉-语言)系列包含一系列强大的大型视觉语言模型(LVLM),专为全面的多模态理解而设计。它们接受图像、文本和边界框作为输入,并输出文本和检测框,从而实现图像检测、多模态对话和多图像推理等高级功能。" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:7 +msgid "" +"This document will show the main verification steps of the `Qwen3-VL-30B-" +"A3B-Instruct`." +msgstr "本文档将展示 `Qwen3-VL-30B-A3B-Instruct` 的主要验证步骤。" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:9 +msgid "Supported Features" +msgstr "支持的功能" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:11 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考[支持的功能](../../user_guide/support_matrix/supported_models.md)以获取模型支持的功能矩阵。" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:12 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "请参考[功能指南](../../user_guide/feature_guide/index.md)以获取功能的配置信息。" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:14 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:16 +msgid "Prepare Model Weights" +msgstr "准备模型权重" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:18 +msgid "" +"Running this model requires 1 Atlas 800I A2 (64G × 8) node or 1 Atlas 800" +" A3 (64G × 16) node." +msgstr "运行此模型需要 1 个 Atlas 800I A2 (64G × 8) 节点或 1 个 Atlas 800 A3 (64G × 16) 节点。" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:20 +msgid "" +"Download model weight at [ModelScope " +"Website](https://modelscope.cn/models/Qwen/Qwen3-VL-30B-A3B-Instruct) or " +"download by below command:" +msgstr "从 [ModelScope 网站](https://modelscope.cn/models/Qwen/Qwen3-VL-30B-A3B-Instruct) 下载模型权重,或使用以下命令下载:" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:27 +msgid "" +"It is recommended to download the model weights to the shared directory " +"of multiple nodes, such as `/root/.cache/`." +msgstr "建议将模型权重下载到多个节点的共享目录中,例如 `/root/.cache/`。" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:29 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:31 +msgid "Run docker container:" +msgstr "运行 Docker 容器:" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:58 +msgid "Setup environment variables:" +msgstr "设置环境变量:" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:69 +msgid "" +"`max_split_size_mb` prevents the native allocator from splitting blocks " +"larger than this size (in MB). This can reduce fragmentation and may " +"allow some borderline workloads to complete without running out of " +"memory. You can find more details " +"[here](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/envref/envref_07_0061.html)." +msgstr "" +"`max_split_size_mb` 可防止原生分配器拆分大于此大小(以 MB 为单位)的内存块。这可以减少内存碎片,并可能使一些临界工作负载在内存耗尽前完成。您可以在[此处](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/envref/envref_07_0061.html)找到更多详细信息。" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:72 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:74 +msgid "Online Serving" +msgstr "在线服务" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md +msgid "Image Inputs" +msgstr "图像输入" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:83 +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:146 +msgid "" +"Run the following command inside the container to start the vLLM server " +"on multi-NPU:" +msgstr "在容器内运行以下命令以在多 NPU 上启动 vLLM 服务器:" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:95 +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:157 +msgid "" +"vllm-ascend supports Expert Parallelism (EP) via `--enable-expert-" +"parallel`, which allows experts in MoE models to be deployed on separate " +"GPUs for better throughput." +msgstr "vllm-ascend 通过 `--enable-expert-parallel` 支持专家并行(EP),这允许将 MoE 模型中的专家部署在单独的 GPU 上以获得更好的吞吐量。" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:97 +msgid "" +"It's highly recommended to specify `--limit-mm-per-prompt.video 0` if " +"your inference server will only process image inputs since enabling video" +" inputs consumes more memory reserved for long video embeddings." +msgstr "如果您的推理服务器仅处理图像输入,强烈建议指定 `--limit-mm-per-prompt.video 0`,因为启用视频输入会消耗更多为长视频嵌入保留的内存。" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:99 +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:159 +msgid "" +"You can set `--max-model-len` to preserve memory. By default the model's " +"context length is 262K, but `--max-model-len 128000` is good for most " +"scenarios." +msgstr "您可以设置 `--max-model-len` 以节省内存。默认情况下,模型的上下文长度为 262K,但 `--max-model-len 128000` 适用于大多数场景。" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:102 +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:164 +msgid "If your service start successfully, you can see the info shown below:" +msgstr "如果您的服务启动成功,您可以看到如下所示的信息:" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:110 +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:172 +msgid "Once your server is started, you can query the model with input prompts:" +msgstr "服务器启动后,您可以使用输入提示词查询模型:" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:128 +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:190 +msgid "" +"If you query the server successfully, you can see the info shown below " +"(client):" +msgstr "如果您成功查询服务器,您可以看到如下所示的信息(客户端):" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:134 +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:196 +msgid "Logs of the vllm server:" +msgstr "vllm 服务器的日志:" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md +msgid "Video Inputs" +msgstr "视频输入" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:161 +msgid "" +"Set `--allowed-local-media-path /media` to use your local video that " +"located at `/media`, since directly download the video during serving can" +" be extremely slow due to network issues." +msgstr "设置 `--allowed-local-media-path /media` 以使用位于 `/media` 的本地视频,因为在服务期间直接下载视频可能因网络问题而极其缓慢。" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:205 +msgid "Offline Inference" +msgstr "离线推理" + +#: ../../source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md:207 +msgid "" +"The usage of offline inference with `Qwen3-VL-30B-A3B-Instruct` is " +"totally the same as that of `Qwen3-VL-8B-Instruct`, find more details at " +"[Qwen3-VL-8B-" +"Instruct](https://docs.vllm.ai/projects/ascend/en/latest/tutorials/models" +"/Qwen-VL-Dense.html#offline-inference)." +msgstr "`Qwen3-VL-30B-A3B-Instruct` 的离线推理使用方法与 `Qwen3-VL-8B-Instruct` 完全相同,更多详细信息请参阅 [Qwen3-VL-8B-Instruct](https://docs.vllm.ai/projects/ascend/en/latest/tutorials/models/Qwen-VL-Dense.html#offline-inference)。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-Embedding.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-Embedding.po new file mode 100644 index 00000000..a7e110fe --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-Embedding.po @@ -0,0 +1,172 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:1 +msgid "Qwen3-VL-Embedding" +msgstr "Qwen3-VL-Embedding" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:5 +msgid "" +"The Qwen3-VL-Embedding and Qwen3-VL-Reranker model series are the latest " +"additions to the Qwen family, built upon the recently open-sourced and " +"powerful Qwen3-VL foundation model. Specifically designed for multimodal " +"information retrieval and cross-modal understanding, this suite accepts " +"diverse inputs including text, images, screenshots, and videos, as well " +"as inputs containing a mixture of these modalities. This guide describes " +"how to run the model with vLLM Ascend." +msgstr "" +"Qwen3-VL-Embedding 和 Qwen3-VL-Reranker 模型系列是 Qwen 家族的最新成员,基于最近开源且强大的 Qwen3-VL 基础模型构建。该系列专为多模态信息检索和跨模态理解而设计,可接受包括文本、图像、截图和视频在内的多样化输入,以及包含这些模态混合的输入。本指南描述了如何使用 vLLM Ascend 运行该模型。" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:7 +msgid "Supported Features" +msgstr "支持特性" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:9 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考[支持特性](../../user_guide/support_matrix/supported_models.md)以获取模型的支持特性矩阵。" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:11 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:13 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:15 +msgid "" +"`Qwen3-VL-Embedding-8B` [Download model " +"weight](https://www.modelscope.cn/models/Qwen/Qwen3-VL-Embedding-8B)" +msgstr "" +"`Qwen3-VL-Embedding-8B` [下载模型权重](https://www.modelscope.cn/models/Qwen/Qwen3-VL-Embedding-8B)" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:16 +msgid "" +"`Qwen3-VL-Embedding-2B` [Download model " +"weight](https://www.modelscope.cn/models/Qwen/Qwen3-VL-Embedding-2B)" +msgstr "" +"`Qwen3-VL-Embedding-2B` [下载模型权重](https://www.modelscope.cn/models/Qwen/Qwen3-VL-Embedding-2B)" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:18 +msgid "" +"It is recommended to download the model weight to the shared directory of" +" multiple nodes, such as `/root/.cache/`" +msgstr "建议将模型权重下载到多个节点的共享目录中,例如 `/root/.cache/`" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:20 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:22 +msgid "" +"You can use our official docker image to run `Qwen3-VL-Embedding` series " +"models." +msgstr "您可以使用我们的官方 docker 镜像来运行 `Qwen3-VL-Embedding` 系列模型。" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:24 +msgid "" +"Start the docker image on your node, refer to [using " +"docker](../../installation.md#set-up-using-docker)." +msgstr "在您的节点上启动 docker 镜像,请参考[使用 docker](../../installation.md#set-up-using-docker)。" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:26 +msgid "" +"If you don't want to use the docker image as above, you can also build " +"all from source:" +msgstr "如果您不想使用上述 docker 镜像,也可以从源码构建所有内容:" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:28 +msgid "" +"Install `vllm-ascend` from source, refer to " +"[installation](../../installation.md)." +msgstr "从源码安装 `vllm-ascend`,请参考[安装指南](../../installation.md)。" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:30 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:32 +msgid "" +"Using the Qwen3-VL-Embedding-8B model as an example, first run the docker" +" container with the following command:" +msgstr "以 Qwen3-VL-Embedding-8B 模型为例,首先使用以下命令运行 docker 容器:" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:34 +msgid "Online Inference" +msgstr "在线推理" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:40 +msgid "Once your server is started, you can query the model with input prompts." +msgstr "服务器启动后,您可以使用输入提示词查询模型。" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:51 +msgid "Offline Inference" +msgstr "离线推理" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:86 +msgid "If you run this script successfully, you can see the info shown below:" +msgstr "如果成功运行此脚本,您将看到如下所示的信息:" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:96 +msgid "For more examples, refer to the vLLM official examples:" +msgstr "更多示例,请参考 vLLM 官方示例:" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:98 +msgid "" +"[Offline Vision Embedding Example](https://github.com/vllm-" +"project/vllm/blob/main/examples/pooling/embed/vision_embedding_offline.py)" +msgstr "" +"[离线视觉嵌入示例](https://github.com/vllm-project/vllm/blob/main/examples/pooling/embed/vision_embedding_offline.py)" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:99 +msgid "" +"[Online Vision Embedding Example](https://github.com/vllm-" +"project/vllm/blob/main/examples/pooling/embed/vision_embedding_online.py)" +msgstr "" +"[在线视觉嵌入示例](https://github.com/vllm-project/vllm/blob/main/examples/pooling/embed/vision_embedding_online.py)" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:101 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:103 +msgid "" +"Run performance of `Qwen3-VL-Embedding-8B` as an example. Refer to [vllm " +"benchmark](https://docs.vllm.ai/en/latest/benchmarking/cli/) for more " +"details." +msgstr "以 `Qwen3-VL-Embedding-8B` 的运行性能为例。更多详情请参考 [vllm 基准测试](https://docs.vllm.ai/en/latest/benchmarking/cli/)。" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:106 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例。按如下方式运行代码。" + +#: ../../source/tutorials/models/Qwen3-VL-Embedding.md:112 +msgid "" +"After about several minutes, you can get the performance evaluation " +"result. With this tutorial, the performance result is:" +msgstr "大约几分钟后,您将获得性能评估结果。在本教程中,性能结果如下:" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-Reranker.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-Reranker.po new file mode 100644 index 00000000..05aa1def --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-Reranker.po @@ -0,0 +1,190 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:1 +msgid "Qwen3-VL-Reranker" +msgstr "Qwen3-VL-Reranker" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:5 +msgid "" +"The Qwen3-VL-Embedding and Qwen3-VL-Reranker model series are the latest " +"additions to the Qwen family, built upon the recently open-sourced and " +"powerful Qwen3-VL foundation model. Specifically designed for multimodal " +"information retrieval and cross-modal understanding, this suite accepts " +"diverse inputs including text, images, screenshots, and videos, as well " +"as inputs containing a mixture of these modalities. This guide describes " +"how to run the model with vLLM Ascend." +msgstr "" +"Qwen3-VL-Embedding 和 Qwen3-VL-Reranker 模型系列是 Qwen 家族的最新成员,基于最近开源且功能强大的 Qwen3-VL 基础模型构建。该系列专为多模态信息检索和跨模态理解而设计,可接受包括文本、图像、截图和视频在内的多样化输入,以及包含这些模态混合的输入。本指南描述了如何使用 vLLM Ascend 运行该模型。" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:7 +msgid "Supported Features" +msgstr "支持的功能" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:9 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考[支持的功能](../../user_guide/support_matrix/supported_models.md)以获取该模型的支持功能矩阵。" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:11 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:13 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:15 +msgid "" +"`Qwen3-VL-Reranker-8B` [Download model " +"weight](https://www.modelscope.cn/models/Qwen/Qwen3-VL-Reranker-8B)" +msgstr "`Qwen3-VL-Reranker-8B` [下载模型权重](https://www.modelscope.cn/models/Qwen/Qwen3-VL-Reranker-8B)" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:16 +msgid "" +"`Qwen3-VL-Reranker-2B` [Download model " +"weight](https://www.modelscope.cn/models/Qwen/Qwen3-VL-Reranker-2B)" +msgstr "`Qwen3-VL-Reranker-2B` [下载模型权重](https://www.modelscope.cn/models/Qwen/Qwen3-VL-Reranker-2B)" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:18 +msgid "" +"It is recommended to download the model weight to the shared directory of" +" multiple nodes, such as `/root/.cache/`" +msgstr "建议将模型权重下载到多个节点的共享目录中,例如 `/root/.cache/`" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:20 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:22 +msgid "" +"You can use our official docker image to run `Qwen3-VL-Reranker` series " +"models." +msgstr "您可以使用我们的官方 docker 镜像来运行 `Qwen3-VL-Reranker` 系列模型。" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:24 +msgid "" +"Start the docker image on your node, refer to [using " +"docker](../../installation.md#set-up-using-docker)." +msgstr "在您的节点上启动 docker 镜像,请参考[使用 docker](../../installation.md#set-up-using-docker)。" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:26 +msgid "" +"If you don't want to use the docker image as above, you can also build " +"all from source:" +msgstr "如果您不想使用上述 docker 镜像,也可以从源代码构建所有内容:" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:28 +msgid "" +"Install `vllm-ascend` from source, refer to " +"[installation](../../installation.md)." +msgstr "从源代码安装 `vllm-ascend`,请参考[安装指南](../../installation.md)。" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:30 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:32 +msgid "Using the Qwen3-VL-Reranker-8B model as an example:" +msgstr "以 Qwen3-VL-Reranker-8B 模型为例:" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:34 +msgid "Chat Template" +msgstr "聊天模板" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:36 +msgid "" +"The Qwen3-VL-Reranker model requires a specific chat template for proper " +"formatting. Create a file named `qwen3_vl_reranker.jinja` with the " +"following content:" +msgstr "Qwen3-VL-Reranker 模型需要一个特定的聊天模板以进行正确格式化。创建一个名为 `qwen3_vl_reranker.jinja` 的文件,内容如下:" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:64 +msgid "" +"Save this file to a location of your choice (e.g., " +"`./qwen3_vl_reranker.jinja`)." +msgstr "将此文件保存到您选择的位置(例如,`./qwen3_vl_reranker.jinja`)。" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:66 +msgid "Online Inference" +msgstr "在线推理" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:68 +msgid "Start the server with the following command:" +msgstr "使用以下命令启动服务器:" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:78 +msgid "Once your server is started, you can send request with follow examples." +msgstr "一旦您的服务器启动,您就可以按照以下示例发送请求。" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:118 +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:203 +msgid "" +"If you run this script successfully, you will see a list of scores " +"printed to the console, similar to this:" +msgstr "如果您成功运行此脚本,您将在控制台看到打印出的分数列表,类似于这样:" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:124 +msgid "Offline Inference" +msgstr "离线推理" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:212 +msgid "For more examples, refer to the vLLM official examples:" +msgstr "更多示例,请参考 vLLM 官方示例:" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:214 +msgid "" +"[Offline Vision Embedding Example](https://github.com/vllm-" +"project/vllm/blob/main/examples/pooling/score/vision_reranker_offline.py)" +msgstr "[离线视觉重排示例](https://github.com/vllm-project/vllm/blob/main/examples/pooling/score/vision_reranker_offline.py)" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:215 +msgid "" +"[Online Vision Embedding Example](https://github.com/vllm-" +"project/vllm/blob/main/examples/pooling/score/vision_rerank_api_online.py)" +msgstr "[在线视觉重排示例](https://github.com/vllm-project/vllm/blob/main/examples/pooling/score/vision_rerank_api_online.py)" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:217 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:219 +msgid "" +"Run performance of `Qwen3-VL-Reranker-8B` as an example. Refer to [vllm " +"benchmark](https://docs.vllm.ai/en/latest/benchmarking/cli/) for more " +"details." +msgstr "以 `Qwen3-VL-Reranker-8B` 的运行性能为例。更多详情请参考 [vllm 基准测试](https://docs.vllm.ai/en/latest/benchmarking/cli/)。" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:222 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例。按如下方式运行代码。" + +#: ../../source/tutorials/models/Qwen3-VL-Reranker.md:228 +msgid "" +"After about several minutes, you can get the performance evaluation " +"result. With this tutorial, the performance result is:" +msgstr "大约几分钟后,您将获得性能评估结果。在本教程中,性能结果如下:" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3.5-27B.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3.5-27B.po new file mode 100644 index 00000000..d2072a46 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3.5-27B.po @@ -0,0 +1,402 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:1 +msgid "Qwen3.5-27B" +msgstr "Qwen3.5-27B" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:5 +msgid "" +"Qwen3.5 represents a significant leap forward, integrating breakthroughs " +"in multimodal learning, architectural efficiency, reinforcement learning " +"scale, and global accessibility to empower developers and enterprises " +"with unprecedented capability and efficiency." +msgstr "Qwen3.5 代表了一次重大飞跃,它整合了多模态学习、架构效率、强化学习规模和全球可访问性方面的突破,为开发者和企业提供了前所未有的能力和效率。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:7 +msgid "" +"This document will show the main verification steps of the model, " +"including supported features, feature configuration, environment " +"preparation, single-node and multi-node deployment, accuracy and " +"performance evaluation." +msgstr "本文档将展示该模型的主要验证步骤,包括支持的特性、特性配置、环境准备、单节点和多节点部署、精度和性能评估。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:9 +msgid "The `Qwen3.5-27B` model is first supported in `vllm-ascend:v0.17.0rc1`." +msgstr "`Qwen3.5-27B` 模型首次在 `vllm-ascend:v0.17.0rc1` 版本中得到支持。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:11 +msgid "Supported Features" +msgstr "支持的特性" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:13 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考[支持的特性](../../user_guide/support_matrix/supported_models.md)以获取模型支持的特性矩阵。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:15 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "请参考[特性指南](../../user_guide/feature_guide/index.md)以获取特性的配置信息。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:17 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:19 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:21 +msgid "" +"`Qwen3.5-27B`(BF16 version): requires 1 Atlas 800 A3 (64G × 16) node or 1" +" Atlas 800 A2 (64G × 8) node. [Download model " +"weight](https://modelscope.cn/models/Qwen/Qwen3.5-27B)" +msgstr "`Qwen3.5-27B` (BF16 版本):需要 1 个 Atlas 800 A3 (64G × 16) 节点或 1 个 Atlas 800 A2 (64G × 8) 节点。[下载模型权重](https://modelscope.cn/models/Qwen/Qwen3.5-27B)" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:22 +msgid "" +"`Qwen3.5-27B-w8a8`(Quantized version): requires 1 Atlas 800 A3 (64G × 16)" +" node or 1 Atlas 800 A2 (64G × 8) node. [Download model " +"weight](https://www.modelscope.cn/models/Eco-Tech/Qwen3.5-27B-w8a8-mtp)" +msgstr "`Qwen3.5-27B-w8a8` (量化版本):需要 1 个 Atlas 800 A3 (64G × 16) 节点或 1 个 Atlas 800 A2 (64G × 8) 节点。[下载模型权重](https://www.modelscope.cn/models/Eco-Tech/Qwen3.5-27B-w8a8-mtp)" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:24 +msgid "" +"It is recommended to download the model weight to the shared directory of" +" multiple nodes, such as `/root/.cache/`." +msgstr "建议将模型权重下载到多个节点的共享目录中,例如 `/root/.cache/`。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:26 +msgid "Verify Multi-node Communication(Optional)" +msgstr "验证多节点通信(可选)" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:28 +msgid "" +"If you want to deploy multi-node environment, you need to verify multi-" +"node communication according to [verify multi-node communication " +"environment](../../installation.md#verify-multi-node-communication)." +msgstr "如果您想部署多节点环境,需要根据[验证多节点通信环境](../../installation.md#verify-multi-node-communication)来验证多节点通信。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:30 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/Qwen3.5-27B.md +msgid "Use docker image" +msgstr "使用 Docker 镜像" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:36 +msgid "" +"For example, using images `quay.io/ascend/vllm-ascend:v0.17.0rc1`(for " +"Atlas 800 A2) and `quay.io/ascend/vllm-ascend:v0.17.0rc1-a3`(for Atlas " +"800 A3)." +msgstr "例如,使用镜像 `quay.io/ascend/vllm-ascend:v0.17.0rc1`(适用于 Atlas 800 A2)和 `quay.io/ascend/vllm-ascend:v0.17.0rc1-a3`(适用于 Atlas 800 A3)。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:38 +msgid "" +"Select an image based on your machine type and start the docker image on " +"your node, refer to [using docker](../../installation.md#set-up-using-" +"docker)." +msgstr "根据您的机器类型选择镜像,并在您的节点上启动 Docker 镜像,请参考[使用 Docker](../../installation.md#set-up-using-docker)。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md +msgid "Build from source" +msgstr "从源码构建" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:78 +msgid "You can build all from source." +msgstr "您可以从源码构建所有组件。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:80 +msgid "" +"Install `vllm-ascend`, refer to [set up using " +"python](../../installation.md#set-up-using-python)." +msgstr "安装 `vllm-ascend`,请参考[使用 Python 设置](../../installation.md#set-up-using-python)。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:84 +msgid "" +"If you want to deploy multi-node environment, you need to set up " +"environment on each node." +msgstr "如果您想部署多节点环境,需要在每个节点上设置环境。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:86 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:88 +msgid "Single-node Deployment" +msgstr "单节点部署" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:90 +msgid "" +"`Qwen3.5-27B` and `Qwen3.5-27B-w8a8` can both be deployed on 1 Atlas 800 " +"A3(64G × 16), 1 Atlas 800 A2(64G × 8). Quantized version needs to start " +"with parameter --quantization ascend." +msgstr "`Qwen3.5-27B` 和 `Qwen3.5-27B-w8a8` 都可以部署在 1 个 Atlas 800 A3 (64G × 16) 或 1 个 Atlas 800 A2 (64G × 8) 节点上。量化版本需要使用参数 `--quantization ascend` 启动。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:92 +msgid "Run the following script to execute online 128k inference." +msgstr "运行以下脚本来执行在线 128k 推理。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:125 +msgid "**Notice:**" +msgstr "**注意:**" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:127 +msgid "The parameters are explained as follows:" +msgstr "参数解释如下:" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:129 +msgid "" +"`--data-parallel-size` 1 and `--tensor-parallel-size` 2 are common " +"settings for data parallelism (DP) and tensor parallelism (TP) sizes." +msgstr "`--data-parallel-size` 1 和 `--tensor-parallel-size` 2 是数据并行 (DP) 和张量并行 (TP) 大小的常见设置。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:130 +msgid "" +"`--max-model-len` represents the context length, which is the maximum " +"value of the input plus output for a single request." +msgstr "`--max-model-len` 表示上下文长度,即单个请求的输入加输出的最大值。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:131 +msgid "" +"`--max-num-seqs` indicates the maximum number of requests that each DP " +"group is allowed to process. If the number of requests sent to the " +"service exceeds this limit, the excess requests will remain in a waiting " +"state and will not be scheduled. Note that the time spent in the waiting " +"state is also counted in metrics such as TTFT and TPOT. Therefore, when " +"testing performance, it is generally recommended that `--max-num-seqs` * " +"`--data-parallel-size` >= the actual total concurrency." +msgstr "`--max-num-seqs` 表示每个 DP 组允许处理的最大请求数。如果发送到服务的请求数超过此限制,超出的请求将保持在等待状态,不会被调度。请注意,在等待状态所花费的时间也会计入 TTFT 和 TPOT 等指标。因此,在测试性能时,通常建议 `--max-num-seqs` * `--data-parallel-size` >= 实际总并发数。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:132 +msgid "" +"`--max-num-batched-tokens` represents the maximum number of tokens that " +"the model can process in a single step. Currently, vLLM v1 scheduling " +"enables ChunkPrefill/SplitFuse by default, which means:" +msgstr "`--max-num-batched-tokens` 表示模型在单步中可以处理的最大 token 数。目前,vLLM v1 调度默认启用 ChunkPrefill/SplitFuse,这意味着:" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:133 +msgid "" +"(1) If the input length of a request is greater than `--max-num-batched-" +"tokens`, it will be divided into multiple rounds of computation according" +" to `--max-num-batched-tokens`;" +msgstr "(1) 如果一个请求的输入长度大于 `--max-num-batched-tokens`,它将根据 `--max-num-batched-tokens` 被分成多轮计算;" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:134 +msgid "" +"(2) Decode requests are prioritized for scheduling, and prefill requests " +"are scheduled only if there is available capacity." +msgstr "(2) 解码请求优先调度,只有在有可用容量时才会调度预填充请求。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:135 +msgid "" +"Generally, if `--max-num-batched-tokens` is set to a larger value, the " +"overall latency will be lower, but the pressure on GPU memory (activation" +" value usage) will be greater." +msgstr "通常,如果将 `--max-num-batched-tokens` 设置为较大的值,整体延迟会更低,但 GPU 内存(激活值使用)的压力会更大。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:136 +msgid "" +"`--gpu-memory-utilization` represents the proportion of HBM that vLLM " +"will use for actual inference. Its essential function is to calculate the" +" available kv_cache size. During the warm-up phase (referred to as " +"profile run in vLLM), vLLM records the peak GPU memory usage during an " +"inference process with an input size of `--max-num-batched-tokens`. The " +"available kv_cache size is then calculated as: `--gpu-memory-utilization`" +" * HBM size - peak GPU memory usage. Therefore, the larger the value of " +"`--gpu-memory-utilization`, the more kv_cache can be used. However, since" +" the GPU memory usage during the warm-up phase may differ from that " +"during actual inference (e.g., due to uneven EP load), setting `--gpu-" +"memory-utilization` too high may lead to OOM (Out of Memory) issues " +"during actual inference. The default value is `0.9`." +msgstr "`--gpu-memory-utilization` 表示 vLLM 将用于实际推理的 HBM 比例。其核心功能是计算可用的 kv_cache 大小。在预热阶段(在 vLLM 中称为 profile run),vLLM 会记录输入大小为 `--max-num-batched-tokens` 的推理过程中的峰值 GPU 内存使用量。然后,可用的 kv_cache 大小计算为:`--gpu-memory-utilization` * HBM 大小 - 峰值 GPU 内存使用量。因此,`--gpu-memory-utilization` 的值越大,可以使用的 kv_cache 就越多。然而,由于预热阶段的 GPU 内存使用量可能与实际推理期间不同(例如,由于 EP 负载不均衡),将 `--gpu-memory-utilization` 设置得过高可能会导致实际推理期间出现 OOM(内存不足)问题。默认值为 `0.9`。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:137 +msgid "" +"`--no-enable-prefix-caching` indicates that prefix caching is disabled. " +"To enable it, for mamba-like models Qwen3.5, set `--enable-prefix-" +"caching` and `--mamba-cache-mode align`. Notice the current " +"implementation of hybrid kv cache might result in a very large block_size" +" when scheduling. For example, the block_size may be adjusted to 2048, " +"which means that any prefix shorter than 2048 will never be cached." +msgstr "`--no-enable-prefix-caching` 表示前缀缓存被禁用。要启用它,对于类似 Mamba 的模型 Qwen3.5,请设置 `--enable-prefix-caching` 和 `--mamba-cache-mode align`。请注意,当前混合 kv cache 的实现可能在调度时导致非常大的 block_size。例如,block_size 可能被调整为 2048,这意味着任何短于 2048 的前缀将永远不会被缓存。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:138 +msgid "" +"`--quantization` \"ascend\" indicates that quantization is used. To " +"disable quantization, remove this option." +msgstr "`--quantization` \"ascend\" 表示使用量化。要禁用量化,请移除此选项。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:139 +msgid "" +"`--compilation-config` contains configurations related to the aclgraph " +"graph mode. The most significant configurations are \"cudagraph_mode\" " +"and \"cudagraph_capture_sizes\", which have the following meanings: " +"\"cudagraph_mode\": represents the specific graph mode. Currently, " +"\"PIECEWISE\" and \"FULL_DECODE_ONLY\" are supported. The graph mode is " +"mainly used to reduce the cost of operator dispatch. Currently, " +"\"FULL_DECODE_ONLY\" is recommended." +msgstr "`--compilation-config` 包含与 aclgraph 图模式相关的配置。最重要的配置是 \"cudagraph_mode\" 和 \"cudagraph_capture_sizes\",其含义如下:\"cudagraph_mode\":表示特定的图模式。目前支持 \"PIECEWISE\" 和 \"FULL_DECODE_ONLY\"。图模式主要用于降低算子调度的开销。目前推荐使用 \"FULL_DECODE_ONLY\"。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:141 +msgid "" +"\"cudagraph_capture_sizes\": represents different levels of graph modes. " +"The default value is [1, 2, 4, 8, 16, 24, 32, 40,..., `--max-num-seqs`]. " +"In the graph mode, the input for graphs at different levels is fixed, and" +" inputs between levels are automatically padded to the next level. " +"Currently, the default setting is recommended. Only in some scenarios is " +"it necessary to set this separately to achieve optimal performance." +msgstr "\"cudagraph_capture_sizes\":表示不同级别的图模式。默认值为 [1, 2, 4, 8, 16, 24, 32, 40,..., `--max-num-seqs`]。在图模式下,不同级别图的输入是固定的,级别之间的输入会自动填充到下一级别。目前推荐使用默认设置。只有在某些场景下,才需要单独设置此参数以达到最佳性能。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:143 +msgid "Functional Verification" +msgstr "功能验证" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:145 +msgid "Once your server is started, you can query the model with input prompts:" +msgstr "一旦您的服务器启动,您就可以使用输入提示词查询模型:" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:158 +msgid "Accuracy Evaluation" +msgstr "精度评估" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:160 +msgid "Here are two accuracy evaluation methods." +msgstr "以下是两种精度评估方法。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:162 +#: ../../source/tutorials/models/Qwen3.5-27B.md:174 +msgid "Using AISBench" +msgstr "使用 AISBench" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:164 +msgid "" +"Refer to [Using " +"AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " +"details." +msgstr "详情请参考[使用 AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:166 +msgid "" +"After execution, you can get the result, here is the result of `Qwen3.5" +"-27B-w8a8` in `vllm-ascend:v0.17.0rc1` for reference only." +msgstr "执行后,您可以获得结果,以下是 `Qwen3.5-27B-w8a8` 在 `vllm-ascend:v0.17.0rc1` 中的结果,仅供参考。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:76 +msgid "dataset" +msgstr "数据集" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:76 +msgid "version" +msgstr "版本" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:76 +msgid "metric" +msgstr "指标" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:76 +msgid "mode" +msgstr "模式" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:76 +msgid "vllm-api-general-chat" +msgstr "vllm-api-general-chat" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:76 +msgid "gsm8k" +msgstr "gsm8k" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:76 +msgid "-" +msgstr "-" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:76 +msgid "accuracy" +msgstr "准确率" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:76 +msgid "gen" +msgstr "生成" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:76 +msgid "96.74" +msgstr "96.74" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:172 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:176 +msgid "" +"Refer to [Using AISBench for performance " +"evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation) for details." +msgstr "详情请参阅[使用AISBench进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:178 +msgid "Using vLLM Benchmark" +msgstr "使用vLLM基准测试" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:180 +msgid "Run performance evaluation of `Qwen3.5-27B-w8a8` as an example." +msgstr "以运行 `Qwen3.5-27B-w8a8` 的性能评估为例。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:182 +msgid "" +"Refer to [vllm " +"benchmark](https://docs.vllm.ai/en/latest/contributing/benchmarks.html) " +"for more details." +msgstr "更多详情请参阅[vllm基准测试](https://docs.vllm.ai/en/latest/contributing/benchmarks.html)。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:184 +msgid "There are three `vllm bench` subcommands:" +msgstr "`vllm bench` 包含三个子命令:" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:186 +msgid "`latency`: Benchmark the latency of a single batch of requests." +msgstr "`latency`:对单批次请求的延迟进行基准测试。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:187 +msgid "`serve`: Benchmark the online serving throughput." +msgstr "`serve`:对在线服务吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:188 +msgid "`throughput`: Benchmark offline inference throughput." +msgstr "`throughput`:对离线推理吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:190 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例,运行以下代码。" + +#: ../../source/tutorials/models/Qwen3.5-27B.md:197 +msgid "" +"After about several minutes, you can get the performance evaluation " +"result." +msgstr "大约几分钟后,您将获得性能评估结果。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3.5-397B-A17B.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3.5-397B-A17B.po new file mode 100644 index 00000000..ba36bd91 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3.5-397B-A17B.po @@ -0,0 +1,542 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:1 +msgid "Qwen3.5-397B-A17B" +msgstr "Qwen3.5-397B-A17B" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:5 +msgid "" +"Qwen3.5 represents a significant leap forward, integrating breakthroughs " +"in multimodal learning, architectural efficiency, reinforcement learning " +"scale, and global accessibility to empower developers and enterprises " +"with unprecedented capability and efficiency." +msgstr "Qwen3.5 代表了一次重大飞跃,它整合了多模态学习、架构效率、强化学习规模和全球可访问性方面的突破,为开发者和企业提供了前所未有的能力和效率。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:7 +msgid "" +"This document will show the main verification steps of the model, " +"including supported features, feature configuration, environment " +"preparation, single-node and multi-node deployment, accuracy and " +"performance evaluation." +msgstr "本文档将展示该模型的主要验证步骤,包括支持的功能、功能配置、环境准备、单节点和多节点部署、精度和性能评估。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:9 +msgid "" +"The `Qwen3.5-397B-A17B` model is first supported in `vllm-" +"ascend:v0.17.0rc1`." +msgstr "`Qwen3.5-397B-A17B` 模型首次在 `vllm-ascend:v0.17.0rc1` 版本中得到支持。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:11 +msgid "Supported Features" +msgstr "支持的功能" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:13 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考[支持的功能](../../user_guide/support_matrix/supported_models.md)以获取模型支持的功能矩阵。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:15 +msgid "" +"Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " +"the feature's configuration." +msgstr "请参考[功能指南](../../user_guide/feature_guide/index.md)以获取功能的配置信息。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:17 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:19 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:21 +msgid "" +"`Qwen3.5-397B-A17B`(BF16 version): require 2 Atlas 800 A3 (64G × 16) " +"nodes or 4 Atlas 800 A2 (64G × 8) nodes. [Download model " +"weight](https://www.modelscope.cn/models/Qwen/Qwen3.5-397B-A17B)" +msgstr "`Qwen3.5-397B-A17B` (BF16 版本):需要 2 个 Atlas 800 A3 (64G × 16) 节点或 4 个 Atlas 800 A2 (64G × 8) 节点。[下载模型权重](https://www.modelscope.cn/models/Qwen/Qwen3.5-397B-A17B)" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:22 +msgid "" +"`Qwen3.5-397B-A17B-w8a8`(Quantized version): require 1 Atlas 800 A3 (64G " +"× 16) node or 2 Atlas 800 A2 (64G × 8) nodes. [Download model " +"weight](https://www.modelscope.cn/models/Eco-Tech/Qwen3.5-397B-A17B-" +"w8a8-mtp)" +msgstr "`Qwen3.5-397B-A17B-w8a8` (量化版本):需要 1 个 Atlas 800 A3 (64G × 16) 节点或 2 个 Atlas 800 A2 (64G × 8) 节点。[下载模型权重](https://www.modelscope.cn/models/Eco-Tech/Qwen3.5-397B-A17B-w8a8-mtp)" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:24 +msgid "" +"It is recommended to download the model weight to the shared directory of" +" multiple nodes, such as `/root/.cache/`." +msgstr "建议将模型权重下载到多个节点的共享目录中,例如 `/root/.cache/`。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:26 +msgid "Verify Multi-node Communication(Optional)" +msgstr "验证多节点通信(可选)" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:28 +msgid "" +"If you want to deploy multi-node environment, you need to verify multi-" +"node communication according to [verify multi-node communication " +"environment](../../installation.md#verify-multi-node-communication)." +msgstr "如果您想部署多节点环境,需要根据[验证多节点通信环境](../../installation.md#verify-multi-node-communication)来验证多节点通信。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:30 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md +msgid "Use docker image" +msgstr "使用 Docker 镜像" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:36 +msgid "" +"For example, using images `quay.io/ascend/vllm-ascend:v0.17.0rc1`(for " +"Atlas 800 A2) and `quay.io/ascend/vllm-ascend:v0.17.0rc1-a3`(for Atlas " +"800 A3)." +msgstr "例如,使用镜像 `quay.io/ascend/vllm-ascend:v0.17.0rc1`(适用于 Atlas 800 A2)和 `quay.io/ascend/vllm-ascend:v0.17.0rc1-a3`(适用于 Atlas 800 A3)。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:38 +msgid "" +"Select an image based on your machine type and start the docker image on " +"your node, refer to [using docker](../../installation.md#set-up-using-" +"docker)." +msgstr "根据您的机器类型选择镜像并在节点上启动 Docker 镜像,请参考[使用 Docker](../../installation.md#set-up-using-docker)。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md +msgid "Build from source" +msgstr "从源码构建" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:78 +msgid "You can build all from source." +msgstr "您可以从源码构建所有组件。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:80 +msgid "" +"Install `vllm-ascend`, refer to [set up using " +"python](../../installation.md#set-up-using-python)." +msgstr "安装 `vllm-ascend`,请参考[使用 Python 设置](../../installation.md#set-up-using-python)。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:84 +msgid "" +"If you want to deploy multi-node environment, you need to set up " +"environment on each node." +msgstr "如果您想部署多节点环境,需要在每个节点上设置环境。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:86 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:88 +msgid "Single-node Deployment" +msgstr "单节点部署" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:90 +msgid "" +"`Qwen3.5-397B-A17B` can be deployed on 2 Atlas 800 A3(64G*16) or 4 Atlas " +"800 A2(64G*8). `Qwen3.5-397B-A17B-w8a8` can be deployed on 1 Atlas 800 " +"A3(64G*16) or 2 Atlas 800 A2(64G*8), need to start with parameter " +"`--quantization ascend`." +msgstr "`Qwen3.5-397B-A17B` 可以部署在 2 个 Atlas 800 A3(64G*16) 或 4 个 Atlas 800 A2(64G*8) 上。`Qwen3.5-397B-A17B-w8a8` 可以部署在 1 个 Atlas 800 A3(64G*16) 或 2 个 Atlas 800 A2(64G*8) 上,需要使用参数 `--quantization ascend` 启动。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:93 +msgid "" +"Run the following script to execute online 128k inference On 1 Atlas 800 " +"A3(64G*16)." +msgstr "在 1 个 Atlas 800 A3(64G*16) 上运行以下脚本以执行在线 128k 推理。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:134 +msgid "**Notice:**" +msgstr "**注意:**" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:136 +msgid "The parameters are explained as follows:" +msgstr "参数解释如下:" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:138 +msgid "" +"`--data-parallel-size` 1 and `--tensor-parallel-size` 16 are common " +"settings for data parallelism (DP) and tensor parallelism (TP) sizes." +msgstr "`--data-parallel-size` 1 和 `--tensor-parallel-size` 16 是数据并行 (DP) 和张量并行 (TP) 大小的常见设置。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:139 +msgid "" +"`--max-model-len` represents the context length, which is the maximum " +"value of the input plus output for a single request." +msgstr "`--max-model-len` 表示上下文长度,即单个请求的输入加输出的最大值。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:140 +msgid "" +"`--max-num-seqs` indicates the maximum number of requests that each DP " +"group is allowed to process. If the number of requests sent to the " +"service exceeds this limit, the excess requests will remain in a waiting " +"state and will not be scheduled. Note that the time spent in the waiting " +"state is also counted in metrics such as TTFT and TPOT. Therefore, when " +"testing performance, it is generally recommended that `--max-num-seqs` * " +"`--data-parallel-size` >= the actual total concurrency." +msgstr "`--max-num-seqs` 表示每个 DP 组允许处理的最大请求数。如果发送到服务的请求数超过此限制,多余的请求将保持在等待状态,不会被调度。请注意,在等待状态所花费的时间也会计入 TTFT 和 TPOT 等指标。因此,在测试性能时,通常建议 `--max-num-seqs` * `--data-parallel-size` >= 实际总并发数。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:141 +msgid "" +"`--max-num-batched-tokens` represents the maximum number of tokens that " +"the model can process in a single step. Currently, vLLM v1 scheduling " +"enables ChunkPrefill/SplitFuse by default, which means:" +msgstr "`--max-num-batched-tokens` 表示模型单步可以处理的最大 token 数。目前,vLLM v1 调度默认启用 ChunkPrefill/SplitFuse,这意味着:" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:142 +msgid "" +"(1) If the input length of a request is greater than `--max-num-batched-" +"tokens`, it will be divided into multiple rounds of computation according" +" to `--max-num-batched-tokens`;" +msgstr "(1) 如果请求的输入长度大于 `--max-num-batched-tokens`,它将根据 `--max-num-batched-tokens` 被分成多轮计算;" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:143 +msgid "" +"(2) Decode requests are prioritized for scheduling, and prefill requests " +"are scheduled only if there is available capacity." +msgstr "(2) 解码请求优先调度,只有在有可用容量时才调度预填充请求。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:144 +msgid "" +"Generally, if `--max-num-batched-tokens` is set to a larger value, the " +"overall latency will be lower, but the pressure on GPU memory (activation" +" value usage) will be greater." +msgstr "通常,如果 `--max-num-batched-tokens` 设置得较大,整体延迟会更低,但 GPU 内存(激活值使用)的压力会更大。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:145 +msgid "" +"`--gpu-memory-utilization` represents the proportion of HBM that vLLM " +"will use for actual inference. Its essential function is to calculate the" +" available kv_cache size. During the warm-up phase (referred to as " +"profile run in vLLM), vLLM records the peak GPU memory usage during an " +"inference process with an input size of `--max-num-batched-tokens`. The " +"available kv_cache size is then calculated as: `--gpu-memory-utilization`" +" * HBM size - peak GPU memory usage. Therefore, the larger the value of " +"`--gpu-memory-utilization`, the more kv_cache can be used. However, since" +" the GPU memory usage during the warm-up phase may differ from that " +"during actual inference (e.g., due to uneven EP load), setting `--gpu-" +"memory-utilization` too high may lead to OOM (Out of Memory) issues " +"during actual inference. The default value is `0.9`." +msgstr "`--gpu-memory-utilization` 表示 vLLM 将用于实际推理的 HBM 比例。其核心功能是计算可用的 kv_cache 大小。在预热阶段(vLLM 中称为 profile run),vLLM 会记录输入大小为 `--max-num-batched-tokens` 的推理过程中的峰值 GPU 内存使用量。然后,可用的 kv_cache 大小计算为:`--gpu-memory-utilization` * HBM 大小 - 峰值 GPU 内存使用量。因此,`--gpu-memory-utilization` 的值越大,可用的 kv_cache 就越多。然而,由于预热阶段的 GPU 内存使用量可能与实际推理时不同(例如,由于 EP 负载不均),将 `--gpu-memory-utilization` 设置得过高可能导致实际推理时出现 OOM(内存不足)问题。默认值为 `0.9`。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:146 +msgid "" +"`--enable-expert-parallel` indicates that EP is enabled. Note that vLLM " +"does not support a mixed approach of ETP and EP; that is, MoE can either " +"use pure EP or pure TP." +msgstr "`--enable-expert-parallel` 表示启用了 EP。请注意,vLLM 不支持 ETP 和 EP 的混合方法;也就是说,MoE 要么使用纯 EP,要么使用纯 TP。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:147 +msgid "" +"`--no-enable-prefix-caching` indicates that prefix caching is disabled. " +"To enable it, for mamba-like models Qwen3.5, set `--enable-prefix-" +"caching` and `--mamba-cache-mode align`. Notice the current " +"implementation of hybrid kv cache might result in a very large block_size" +" when scheduling. For example, the block_size may be adjusted to 2048, " +"which means that any prefix shorter than 2048 will never be cached." +msgstr "`--no-enable-prefix-caching` 表示前缀缓存被禁用。要启用它,对于类似 Mamba 的模型 Qwen3.5,请设置 `--enable-prefix-caching` 和 `--mamba-cache-mode align`。请注意,当前混合 kv cache 的实现可能在调度时导致非常大的 block_size。例如,block_size 可能被调整为 2048,这意味着任何短于 2048 的前缀将永远不会被缓存。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:148 +msgid "" +"`--quantization` \"ascend\" indicates that quantization is used. To " +"disable quantization, remove this option." +msgstr "`--quantization` \"ascend\" 表示使用了量化。要禁用量化,请移除此选项。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:149 +msgid "" +"`--compilation-config` contains configurations related to the aclgraph " +"graph mode. The most significant configurations are \"cudagraph_mode\" " +"and \"cudagraph_capture_sizes\", which have the following meanings: " +"\"cudagraph_mode\": represents the specific graph mode. Currently, " +"\"PIECEWISE\" and \"FULL_DECODE_ONLY\" are supported. The graph mode is " +"mainly used to reduce the cost of operator dispatch. Currently, " +"\"FULL_DECODE_ONLY\" is recommended." +msgstr "`--compilation-config` 包含与 aclgraph 图模式相关的配置。最重要的配置是 \"cudagraph_mode\" 和 \"cudagraph_capture_sizes\",其含义如下:\"cudagraph_mode\":表示特定的图模式。目前支持 \"PIECEWISE\" 和 \"FULL_DECODE_ONLY\"。图模式主要用于降低算子调度的开销。目前推荐使用 \"FULL_DECODE_ONLY\"。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:151 +msgid "" +"\"cudagraph_capture_sizes\": represents different levels of graph modes. " +"The default value is [1, 2, 4, 8, 16, 24, 32, 40,..., `--max-num-seqs`]. " +"In the graph mode, the input for graphs at different levels is fixed, and" +" inputs between levels are automatically padded to the next level. " +"Currently, the default setting is recommended. Only in some scenarios is " +"it necessary to set this separately to achieve optimal performance." +msgstr "\"cudagraph_capture_sizes\":表示不同级别的图模式。默认值为 [1, 2, 4, 8, 16, 24, 32, 40,..., `--max-num-seqs`]。在图模式下,不同级别图的输入是固定的,级别之间的输入会自动填充到下一个级别。目前推荐使用默认设置。只有在某些场景下,才需要单独设置此参数以达到最佳性能。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:153 +msgid "Multi-node Deployment with MP (Recommended)" +msgstr "使用 MP 的多节点部署(推荐)" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:155 +msgid "" +"Assume you have 2 Atlas 800 A2 nodes, and want to deploy the `Qwen3.5" +"-397B-A17B` model across multiple nodes." +msgstr "假设您有 2 个 Atlas 800 A2 节点,并希望跨多个节点部署 `Qwen3.5-397B-A17B` 模型。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:157 +msgid "Node 0" +msgstr "节点 0" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:203 +msgid "Node1" +msgstr "节点 1" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:253 +msgid "" +"If the service starts successfully, the following information will be " +"displayed on node 0:" +msgstr "如果服务启动成功,节点 0 上将显示以下信息:" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:264 +msgid "Multi-node Deployment with Ray" +msgstr "使用 Ray 的多节点部署" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:266 +msgid "refer to [Ray Distributed (Qwen/Qwen3-235B-A22B)](../features/ray.md)." +msgstr "请参考 [Ray 分布式 (Qwen/Qwen3-235B-A22B)](../features/ray.md)。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:268 +msgid "Prefill-Decode Disaggregation" +msgstr "预填充-解码解耦" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:270 +msgid "" +"We recommend using Mooncake for deployment: " +"[Mooncake](../features/pd_disaggregation_mooncake_multi_node.md)." +msgstr "我们推荐使用 Mooncake 进行部署:[Mooncake](../features/pd_disaggregation_mooncake_multi_node.md)。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:272 +msgid "" +"Take Atlas 800 A3 (64G × 16) for example, we recommend to deploy 1P1D (3 " +"nodes) to run Qwen3.5-397B-A17B." +msgstr "以 Atlas 800 A3 (64G × 16) 为例,我们建议部署 1P1D(3 个节点)来运行 Qwen3.5-397B-A17B。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:274 +msgid "`Qwen3.5-397B-A17B-w8a8-mtp 1P1D` require 3 Atlas 800 A3 (64G × 16)." +msgstr "`Qwen3.5-397B-A17B-w8a8-mtp 1P1D` 需要 3 个 Atlas 800 A3 (64G × 16)。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:276 +msgid "" +"To run the vllm-ascend `Prefill-Decode Disaggregation` service, you need " +"to deploy `run_p.sh` 、`run_d0.sh` and `run_d1.sh` script on each node and" +" deploy a `proxy.sh` script on prefill master node to forward requests." +msgstr "要运行 vllm-ascend `Prefill-Decode Disaggregation` 服务,您需要在每个节点上部署 `run_p.sh`、`run_d0.sh` 和 `run_d1.sh` 脚本,并在预填充主节点上部署一个 `proxy.sh` 脚本来转发请求。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:278 +msgid "Prefill Node 0 `run_p.sh` script" +msgstr "预填充节点 0 `run_p.sh` 脚本" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:353 +msgid "Decode Node 0 `run_d0.sh` script" +msgstr "解码节点 0 `run_d0.sh` 脚本" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:433 +msgid "Decode Node 1 `run_d1.sh` script" +msgstr "解码节点 1 `run_d1.sh` 脚本" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:512 +msgid "**Notice:** The parameters are explained as follows:" +msgstr "**注意:** 参数说明如下:" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:515 +msgid "" +"`--async-scheduling`: enables the asynchronous scheduling function. When " +"Multi-Token Prediction (MTP) is enabled, asynchronous scheduling of " +"operator delivery can be implemented to overlap the operator delivery " +"latency." +msgstr "" +"`--async-scheduling`:启用异步调度功能。当启用多令牌预测(MTP)时,可以实现算子交付的异步调度,以重叠算子交付延迟。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:516 +msgid "" +"`cudagraph_capture_sizes`: The recommended value is `n x (mtp + 1)`. And " +"the min is `n = 1` and the max is `n = max-num-seqs`. For other values, " +"it is recommended to set them to the number of frequently occurring " +"requests on the Decode (D) node." +msgstr "" +"`cudagraph_capture_sizes`:推荐值为 `n x (mtp + 1)`。最小值为 `n = 1`,最大值为 `n = max-num-seqs`。对于其他值,建议设置为解码(D)节点上频繁出现的请求数量。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:517 +msgid "" +"`recompute_scheduler_enable: true`: enables the recomputation scheduler. " +"When the Key-Value Cache (KV Cache) of the decode node is insufficient, " +"requests will be sent to the prefill node to recompute the KV Cache. In " +"the PD separation scenario, it is recommended to enable this " +"configuration on both prefill and decode nodes simultaneously." +msgstr "" +"`recompute_scheduler_enable: true`:启用重计算调度器。当解码节点的键值缓存(KV Cache)不足时,请求将被发送到预填充节点以重新计算 KV Cache。在 PD 分离场景下,建议同时在预填充节点和解码节点上启用此配置。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:518 +msgid "" +"`no-enable-prefix-caching`: The prefix-cache feature is enabled by " +"default. You can use the `--no-enable-prefix-caching` parameter to " +"disable this feature. Notice: for Prefill-Decode disaggregation feature, " +"known issue on D node: [#7944](https://github.com/vllm-project/vllm-" +"ascend/issues/7944)" +msgstr "" +"`no-enable-prefix-caching`:前缀缓存功能默认启用。您可以使用 `--no-enable-prefix-caching` 参数禁用此功能。注意:对于预填充-解码分离功能,D 节点上的已知问题:[#7944](https://github.com/vllm-project/vllm-ascend/issues/7944)" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:520 +msgid "Run the `proxy.sh` script on the prefill master node" +msgstr "在预填充主节点上运行 `proxy.sh` 脚本" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:522 +msgid "" +"Run a proxy server on the same node with the prefiller service instance. " +"You can get the proxy program in the repository's examples: " +"[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-" +"project/vllm-" +"ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" +msgstr "" +"在与预填充服务实例相同的节点上运行一个代理服务器。您可以在仓库的示例中找到代理程序:[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:548 +msgid "Functional Verification" +msgstr "功能验证" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:550 +msgid "Once your server is started, you can query the model with input prompts:" +msgstr "服务器启动后,您可以使用输入提示词查询模型:" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:563 +msgid "Accuracy Evaluation" +msgstr "精度评估" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:565 +msgid "Here are two accuracy evaluation methods." +msgstr "以下是两种精度评估方法。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:567 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:579 +msgid "Using AISBench" +msgstr "使用 AISBench" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:569 +msgid "" +"Refer to [Using " +"AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " +"details." +msgstr "详情请参阅[使用 AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:571 +msgid "" +"After execution, you can get the result, here is the result of `Qwen3.5" +"-397B-A17B-w8a8` in `vllm-ascend:v0.17.0rc1` for reference only." +msgstr "执行后,您可以获得结果,以下是 `vllm-ascend:v0.17.0rc1` 中 `Qwen3.5-397B-A17B-w8a8` 的结果,仅供参考。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:76 +msgid "dataset" +msgstr "数据集" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:76 +msgid "version" +msgstr "版本" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:76 +msgid "metric" +msgstr "指标" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:76 +msgid "mode" +msgstr "模式" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:76 +msgid "vllm-api-general-chat" +msgstr "vllm-api-general-chat" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:76 +msgid "gsm8k" +msgstr "gsm8k" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:76 +msgid "-" +msgstr "-" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:76 +msgid "accuracy" +msgstr "准确率" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:76 +msgid "gen" +msgstr "生成" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:76 +msgid "96.74" +msgstr "96.74" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:577 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:581 +msgid "" +"Refer to [Using AISBench for performance " +"evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation) for details." +msgstr "详情请参阅[使用 AISBench 进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:583 +msgid "Using vLLM Benchmark" +msgstr "使用 vLLM Benchmark" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:585 +msgid "Run performance evaluation of `Qwen3.5-397B-A17B-w8a8` as an example." +msgstr "以运行 `Qwen3.5-397B-A17B-w8a8` 的性能评估为例。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:587 +msgid "" +"Refer to [vllm " +"benchmark](https://docs.vllm.ai/en/latest/contributing/benchmarks.html) " +"for more details." +msgstr "更多详情请参阅 [vllm benchmark](https://docs.vllm.ai/en/latest/contributing/benchmarks.html)。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:589 +msgid "There are three `vllm bench` subcommands:" +msgstr "`vllm bench` 有三个子命令:" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:591 +msgid "`latency`: Benchmark the latency of a single batch of requests." +msgstr "`latency`:对单批请求的延迟进行基准测试。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:592 +msgid "`serve`: Benchmark the online serving throughput." +msgstr "`serve`:对在线服务吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:593 +msgid "`throughput`: Benchmark offline inference throughput." +msgstr "`throughput`:对离线推理吞吐量进行基准测试。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:595 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例。运行代码如下。" + +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:602 +msgid "" +"After about several minutes, you can get the performance evaluation " +"result." +msgstr "大约几分钟后,您将获得性能评估结果。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3_embedding.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3_embedding.po new file mode 100644 index 00000000..6a1a723c --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3_embedding.po @@ -0,0 +1,158 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Qwen3_embedding.md:1 +msgid "Qwen3-Embedding" +msgstr "Qwen3-Embedding" + +#: ../../source/tutorials/models/Qwen3_embedding.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/Qwen3_embedding.md:5 +msgid "" +"The Qwen3 Embedding model series is the latest proprietary model of the " +"Qwen family, specifically designed for text embedding and ranking tasks. " +"Building upon the dense foundational models of the Qwen3 series, it " +"provides a comprehensive range of text embeddings and reranking models in" +" various sizes (0.6B, 4B, and 8B). This guide describes how to run the " +"model with vLLM Ascend. Note that only 0.9.2rc1 and higher versions of " +"vLLM Ascend support the model." +msgstr "" +"Qwen3 Embedding 模型系列是 Qwen 家族最新的专有模型,专为文本嵌入和排序任务设计。它基于 Qwen3 系列的稠密基础模型,提供了多种尺寸(0.6B、4B 和 8B)的全面文本嵌入和重排序模型。本指南描述了如何使用 vLLM Ascend 运行该模型。请注意,只有 vLLM Ascend 0.9.2rc1 及更高版本支持此模型。" + +#: ../../source/tutorials/models/Qwen3_embedding.md:7 +msgid "Supported Features" +msgstr "支持的功能" + +#: ../../source/tutorials/models/Qwen3_embedding.md:9 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考[支持的功能](../../user_guide/support_matrix/supported_models.md)以获取该模型的支持功能矩阵。" + +#: ../../source/tutorials/models/Qwen3_embedding.md:11 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/Qwen3_embedding.md:13 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/Qwen3_embedding.md:15 +msgid "" +"`Qwen3-Embedding-8B` [Download model " +"weight](https://www.modelscope.cn/models/Qwen/Qwen3-Embedding-8B)" +msgstr "`Qwen3-Embedding-8B` [下载模型权重](https://www.modelscope.cn/models/Qwen/Qwen3-Embedding-8B)" + +#: ../../source/tutorials/models/Qwen3_embedding.md:16 +msgid "" +"`Qwen3-Embedding-4B` [Download model " +"weight](https://www.modelscope.cn/models/Qwen/Qwen3-Embedding-4B)" +msgstr "`Qwen3-Embedding-4B` [下载模型权重](https://www.modelscope.cn/models/Qwen/Qwen3-Embedding-4B)" + +#: ../../source/tutorials/models/Qwen3_embedding.md:17 +msgid "" +"`Qwen3-Embedding-0.6B` [Download model " +"weight](https://www.modelscope.cn/models/Qwen/Qwen3-Embedding-0.6B)" +msgstr "`Qwen3-Embedding-0.6B` [下载模型权重](https://www.modelscope.cn/models/Qwen/Qwen3-Embedding-0.6B)" + +#: ../../source/tutorials/models/Qwen3_embedding.md:19 +msgid "" +"It is recommended to download the model weight to the shared directory of" +" multiple nodes, such as `/root/.cache/`" +msgstr "建议将模型权重下载到多个节点的共享目录中,例如 `/root/.cache/`" + +#: ../../source/tutorials/models/Qwen3_embedding.md:21 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/Qwen3_embedding.md:23 +msgid "" +"You can use our official docker image to run `Qwen3-Embedding` series " +"models." +msgstr "您可以使用我们的官方 docker 镜像来运行 `Qwen3-Embedding` 系列模型。" + +#: ../../source/tutorials/models/Qwen3_embedding.md:25 +msgid "" +"Start the docker image on your node, refer to [using " +"docker](../../installation.md#set-up-using-docker)." +msgstr "在您的节点上启动 docker 镜像,请参考[使用 docker](../../installation.md#set-up-using-docker)。" + +#: ../../source/tutorials/models/Qwen3_embedding.md:27 +msgid "" +"if you don't want to use the docker image as above, you can also build " +"all from source:" +msgstr "如果您不想使用上述的 docker 镜像,也可以从源代码构建所有内容:" + +#: ../../source/tutorials/models/Qwen3_embedding.md:29 +msgid "" +"Install `vllm-ascend` from source, refer to " +"[installation](../../installation.md)." +msgstr "从源代码安装 `vllm-ascend`,请参考[安装指南](../../installation.md)。" + +#: ../../source/tutorials/models/Qwen3_embedding.md:31 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/Qwen3_embedding.md:33 +msgid "" +"Using the Qwen3-Embedding-8B model as an example, first run the docker " +"container with the following command:" +msgstr "以 Qwen3-Embedding-8B 模型为例,首先使用以下命令运行 docker 容器:" + +#: ../../source/tutorials/models/Qwen3_embedding.md:35 +msgid "Online Inference" +msgstr "在线推理" + +#: ../../source/tutorials/models/Qwen3_embedding.md:41 +msgid "Once your server is started, you can query the model with input prompts." +msgstr "一旦您的服务器启动,您就可以使用输入提示词查询模型。" + +#: ../../source/tutorials/models/Qwen3_embedding.md:52 +msgid "Offline Inference" +msgstr "离线推理" + +#: ../../source/tutorials/models/Qwen3_embedding.md:87 +msgid "If you run this script successfully, you can see the info shown below:" +msgstr "如果您成功运行此脚本,您将看到如下所示的信息:" + +#: ../../source/tutorials/models/Qwen3_embedding.md:96 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/Qwen3_embedding.md:98 +msgid "" +"Run performance of `Qwen3-Reranker-8B` as an example. Refer to [vllm " +"benchmark](https://docs.vllm.ai/en/latest/contributing/) for more " +"details." +msgstr "以 `Qwen3-Reranker-8B` 的运行性能为例。更多详情请参考 [vllm 基准测试](https://docs.vllm.ai/en/latest/contributing/)。" + +#: ../../source/tutorials/models/Qwen3_embedding.md:101 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例。按如下方式运行代码。" + +#: ../../source/tutorials/models/Qwen3_embedding.md:107 +msgid "" +"After about several minutes, you can get the performance evaluation " +"result. With this tutorial, the performance result is:" +msgstr "大约几分钟后,您将获得性能评估结果。按照本教程,性能结果如下:" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3_reranker.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3_reranker.po new file mode 100644 index 00000000..2fdef71b --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3_reranker.po @@ -0,0 +1,165 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/Qwen3_reranker.md:1 +msgid "Qwen3-Reranker" +msgstr "Qwen3-Reranker" + +#: ../../source/tutorials/models/Qwen3_reranker.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/tutorials/models/Qwen3_reranker.md:5 +msgid "" +"The Qwen3 Reranker model series is the latest proprietary model of the " +"Qwen family, specifically designed for text embedding and ranking tasks. " +"Building upon the dense foundational models of the Qwen3 series, it " +"provides a comprehensive range of text embeddings and reranking models in" +" various sizes (0.6B, 4B, and 8B). This guide describes how to run the " +"model with vLLM Ascend. Note that only 0.9.2rc1 and higher versions of " +"vLLM Ascend support the model." +msgstr "" +"Qwen3 Reranker 模型系列是 Qwen 家族最新的专有模型,专为文本嵌入和排序任务设计。它基于 Qwen3 系列的稠密基础模型,提供了多种尺寸(0.6B、4B 和 8B)的全面文本嵌入和重排序模型。本指南描述了如何使用 vLLM Ascend 运行该模型。请注意,只有 vLLM Ascend 0.9.2rc1 及更高版本支持此模型。" + +#: ../../source/tutorials/models/Qwen3_reranker.md:7 +msgid "Supported Features" +msgstr "支持的功能" + +#: ../../source/tutorials/models/Qwen3_reranker.md:9 +msgid "" +"Refer to [supported " +"features](../../user_guide/support_matrix/supported_models.md) to get the" +" model's supported feature matrix." +msgstr "请参考[支持的功能](../../user_guide/support_matrix/supported_models.md)以获取该模型支持的功能矩阵。" + +#: ../../source/tutorials/models/Qwen3_reranker.md:11 +msgid "Environment Preparation" +msgstr "环境准备" + +#: ../../source/tutorials/models/Qwen3_reranker.md:13 +msgid "Model Weight" +msgstr "模型权重" + +#: ../../source/tutorials/models/Qwen3_reranker.md:15 +msgid "" +"`Qwen3-Reranker-8B` [Download model " +"weight](https://www.modelscope.cn/models/Qwen/Qwen3-Reranker-8B)" +msgstr "`Qwen3-Reranker-8B` [下载模型权重](https://www.modelscope.cn/models/Qwen/Qwen3-Reranker-8B)" + +#: ../../source/tutorials/models/Qwen3_reranker.md:16 +msgid "" +"`Qwen3-Reranker-4B` [Download model " +"weight](https://www.modelscope.cn/models/Qwen/Qwen3-Reranker-4B)" +msgstr "`Qwen3-Reranker-4B` [下载模型权重](https://www.modelscope.cn/models/Qwen/Qwen3-Reranker-4B)" + +#: ../../source/tutorials/models/Qwen3_reranker.md:17 +msgid "" +"`Qwen3-Reranker-0.6B` [Download model " +"weight](https://www.modelscope.cn/models/Qwen/Qwen3-Reranker-0.6B)" +msgstr "`Qwen3-Reranker-0.6B` [下载模型权重](https://www.modelscope.cn/models/Qwen/Qwen3-Reranker-0.6B)" + +#: ../../source/tutorials/models/Qwen3_reranker.md:19 +msgid "" +"It is recommended to download the model weight to the shared directory of" +" multiple nodes, such as `/root/.cache/`" +msgstr "建议将模型权重下载到多节点的共享目录中,例如 `/root/.cache/`" + +#: ../../source/tutorials/models/Qwen3_reranker.md:21 +msgid "Installation" +msgstr "安装" + +#: ../../source/tutorials/models/Qwen3_reranker.md:23 +msgid "" +"You can use our official docker image to run `Qwen3-Reranker` series " +"models." +msgstr "您可以使用我们的官方 docker 镜像来运行 `Qwen3-Reranker` 系列模型。" + +#: ../../source/tutorials/models/Qwen3_reranker.md:25 +msgid "" +"Start the docker image on your node, refer to [using " +"docker](../../installation.md#set-up-using-docker)." +msgstr "在您的节点上启动 docker 镜像,请参考[使用 docker](../../installation.md#set-up-using-docker)。" + +#: ../../source/tutorials/models/Qwen3_reranker.md:27 +msgid "" +"if you don't want to use the docker image as above, you can also build " +"all from source:" +msgstr "如果您不想使用上述 docker 镜像,也可以从源代码构建所有内容:" + +#: ../../source/tutorials/models/Qwen3_reranker.md:29 +msgid "" +"Install `vllm-ascend` from source, refer to " +"[installation](../../installation.md)." +msgstr "从源代码安装 `vllm-ascend`,请参考[安装](../../installation.md)。" + +#: ../../source/tutorials/models/Qwen3_reranker.md:31 +msgid "Deployment" +msgstr "部署" + +#: ../../source/tutorials/models/Qwen3_reranker.md:33 +msgid "" +"Using the Qwen3-Reranker-8B model as an example, first run the docker " +"container with the following command:" +msgstr "以 Qwen3-Reranker-8B 模型为例,首先使用以下命令运行 docker 容器:" + +#: ../../source/tutorials/models/Qwen3_reranker.md:35 +msgid "Online Inference" +msgstr "在线推理" + +#: ../../source/tutorials/models/Qwen3_reranker.md:41 +msgid "Once your server is started, you can send request with follow examples." +msgstr "服务器启动后,您可以按照以下示例发送请求。" + +#: ../../source/tutorials/models/Qwen3_reranker.md:43 +msgid "requests demo + formatting query & document" +msgstr "requests 演示 + 格式化查询和文档" + +#: ../../source/tutorials/models/Qwen3_reranker.md:83 +#: ../../source/tutorials/models/Qwen3_reranker.md:160 +msgid "" +"If you run this script successfully, you will see a list of scores " +"printed to the console, similar to this:" +msgstr "如果成功运行此脚本,您将在控制台看到打印出的分数列表,类似于以下内容:" + +#: ../../source/tutorials/models/Qwen3_reranker.md:89 +msgid "Offline Inference" +msgstr "离线推理" + +#: ../../source/tutorials/models/Qwen3_reranker.md:166 +msgid "Performance" +msgstr "性能" + +#: ../../source/tutorials/models/Qwen3_reranker.md:168 +msgid "" +"Run performance of `Qwen3-Reranker-8B` as an example. Refer to [vllm " +"benchmark](https://docs.vllm.ai/en/latest/contributing/) for more " +"details." +msgstr "以 `Qwen3-Reranker-8B` 的运行性能为例。更多详情请参考 [vllm 基准测试](https://docs.vllm.ai/en/latest/contributing/)。" + +#: ../../source/tutorials/models/Qwen3_reranker.md:171 +msgid "Take the `serve` as an example. Run the code as follows." +msgstr "以 `serve` 为例。按如下方式运行代码。" + +#: ../../source/tutorials/models/Qwen3_reranker.md:177 +msgid "" +"After about several minutes, you can get the performance evaluation " +"result. With this tutorial, the performance result is:" +msgstr "大约几分钟后,您将获得性能评估结果。在本教程中,性能结果如下:" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/index.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/index.po new file mode 100644 index 00000000..36e79006 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/index.po @@ -0,0 +1,29 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/tutorials/models/index.md:1 +#: ../../source/tutorials/models/index.md:5 +msgid "Model Tutorials" +msgstr "模型教程" + +#: ../../source/tutorials/models/index.md:3 +msgid "This section provides tutorials for different models of vLLM Ascend." +msgstr "本节提供 vLLM Ascend 不同模型的使用教程。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po index b60df5ac..a39a4500 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po @@ -4,283 +4,518 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../user_guide/configuration/additional_config.md:1 +#: ../../source/user_guide/configuration/additional_config.md:1 msgid "Additional Configuration" msgstr "附加配置" -#: ../../user_guide/configuration/additional_config.md:3 +#: ../../source/user_guide/configuration/additional_config.md:3 msgid "" -"additional configuration is a mechanism provided by vLLM to allow plugins to" -" control inner behavior by their own. vLLM Ascend uses this mechanism to " -"make the project more flexible." -msgstr "额外配置是 vLLM 提供的一种机制,允许插件自行控制内部行为。vLLM Ascend 利用这种机制使项目更加灵活。" +"Additional configuration is a mechanism provided by vLLM to allow plugins" +" to control internal behavior by themselves. VLLM Ascend uses this " +"mechanism to make the project more flexible." +msgstr "附加配置是 vLLM 提供的一种机制,允许插件自行控制内部行为。VLLM Ascend 利用此机制使项目更加灵活。" -#: ../../user_guide/configuration/additional_config.md:5 +#: ../../source/user_guide/configuration/additional_config.md:5 msgid "How to use" -msgstr "如何使用" +msgstr "使用方法" -#: ../../user_guide/configuration/additional_config.md:7 +#: ../../source/user_guide/configuration/additional_config.md:7 msgid "" "With either online mode or offline mode, users can use additional " "configuration. Take Qwen3 as an example:" -msgstr "无论是在线模式还是离线模式,用户都可以使用额外的配置。以 Qwen3 为例:" +msgstr "无论是在线模式还是离线模式,用户都可以使用附加配置。以 Qwen3 为例:" -#: ../../user_guide/configuration/additional_config.md:9 +#: ../../source/user_guide/configuration/additional_config.md:9 msgid "**Online mode**:" msgstr "**在线模式**:" -#: ../../user_guide/configuration/additional_config.md:15 +#: ../../source/user_guide/configuration/additional_config.md:15 msgid "**Offline mode**:" msgstr "**离线模式**:" -#: ../../user_guide/configuration/additional_config.md:23 +#: ../../source/user_guide/configuration/additional_config.md:23 msgid "Configuration options" msgstr "配置选项" -#: ../../user_guide/configuration/additional_config.md:25 +#: ../../source/user_guide/configuration/additional_config.md:25 msgid "" -"The following table lists the additional configuration options available in " +"The following table lists additional configuration options available in " "vLLM Ascend:" -msgstr "下表列出了 vLLM Ascend 中可用的其他配置选项:" +msgstr "下表列出了 vLLM Ascend 中可用的附加配置选项:" -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md msgid "Name" msgstr "名称" -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md msgid "Type" msgstr "类型" -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md msgid "Default" -msgstr "默认" +msgstr "默认值" -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md msgid "Description" msgstr "描述" -#: ../../user_guide/configuration/additional_config.md -msgid "`torchair_graph_config`" -msgstr "`torchair_graph_config`" +#: ../../source/user_guide/configuration/additional_config.md +msgid "`xlite_graph_config`" +msgstr "`xlite_graph_config`" -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md msgid "dict" msgstr "dict" -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md #, python-brace-format msgid "`{}`" msgstr "`{}`" -#: ../../user_guide/configuration/additional_config.md -msgid "The config options for torchair graph mode" -msgstr "torchair 图模式的配置选项" +#: ../../source/user_guide/configuration/additional_config.md +msgid "Configuration options for Xlite graph mode" +msgstr "Xlite 图模式的配置选项" -#: ../../user_guide/configuration/additional_config.md -msgid "`ascend_scheduler_config`" -msgstr "`ascend_scheduler_config`" +#: ../../source/user_guide/configuration/additional_config.md +msgid "`weight_prefetch_config`" +msgstr "`weight_prefetch_config`" -#: ../../user_guide/configuration/additional_config.md -msgid "The config options for ascend scheduler" -msgstr "ascend 调度器的配置选项" +#: ../../source/user_guide/configuration/additional_config.md +msgid "Configuration options for weight prefetch" +msgstr "权重预取的配置选项" -#: ../../user_guide/configuration/additional_config.md -msgid "`expert_tensor_parallel_size`" -msgstr "`expert_tensor_parallel_size`" +#: ../../source/user_guide/configuration/additional_config.md +msgid "`finegrained_tp_config`" +msgstr "`finegrained_tp_config`" -#: ../../user_guide/configuration/additional_config.md -msgid "str" -msgstr "str" +#: ../../source/user_guide/configuration/additional_config.md +msgid "Configuration options for module tensor parallelism" +msgstr "模块张量并行的配置选项" -#: ../../user_guide/configuration/additional_config.md -msgid "`0`" -msgstr "`0`" +#: ../../source/user_guide/configuration/additional_config.md +msgid "`ascend_compilation_config`" +msgstr "`ascend_compilation_config`" -#: ../../user_guide/configuration/additional_config.md -msgid "Expert tensor parallel size the model to use." -msgstr "专家张量并行的模型大小设置。" +#: ../../source/user_guide/configuration/additional_config.md +msgid "Configuration options for ascend compilation" +msgstr "昇腾编译的配置选项" -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md +msgid "`eplb_config`" +msgstr "`eplb_config`" + +#: ../../source/user_guide/configuration/additional_config.md msgid "`refresh`" -msgstr "`刷新`" +msgstr "`refresh`" -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md msgid "bool" msgstr "bool" -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md msgid "`false`" msgstr "`false`" -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md msgid "" -"Whether to refresh global ascend config content. This value is usually used " -"by rlhf or ut/e2e test case." -msgstr "是否刷新全局 ascend 配置信息。此值通常由 rlhf 或 ut/e2e 测试用例使用。" +"Whether to refresh global Ascend configuration content. This is usually " +"used by rlhf or ut/e2e test case." +msgstr "是否刷新全局 Ascend 配置内容。通常由 RLHF 或 UT/E2E 测试用例使用。" -#: ../../user_guide/configuration/additional_config.md -msgid "`expert_map_path`" -msgstr "`expert_map_path`" +#: ../../source/user_guide/configuration/additional_config.md +msgid "`dump_config_path`" +msgstr "`dump_config_path`" -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md +msgid "str" +msgstr "str" + +#: ../../source/user_guide/configuration/additional_config.md msgid "`None`" msgstr "`None`" -#: ../../user_guide/configuration/additional_config.md -msgid "" -"When using expert load balancing for the MOE model, an expert map path needs" -" to be passed in." -msgstr "在为MOE模型使用专家负载均衡时,需要传入专家映射路径。" +#: ../../source/user_guide/configuration/additional_config.md +msgid "Configuration file path for msprobe dump(eager mode)." +msgstr "msprobe dump(eager 模式)的配置文件路径。" -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md +msgid "`enable_async_exponential`" +msgstr "`enable_async_exponential`" + +#: ../../source/user_guide/configuration/additional_config.md msgid "`False`" msgstr "`False`" -#: ../../user_guide/configuration/additional_config.md -msgid "Whether to enable the fused operator-like chunked_prefill." -msgstr "是否启用类似算子融合的 chunked_prefill 功能。" - -#: ../../user_guide/configuration/additional_config.md -msgid "`kv_cache_dtype`" -msgstr "`kv_cache_dtype`" - -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md msgid "" -"When using the kv cache quantization method, kv cache dtype needs to be set," -" currently only int8 is supported." -msgstr "当使用kv缓存量化方法时,需要设置kv缓存的数据类型,目前仅支持int8。" +"Whether to enable asynchronous exponential overlap. To enable " +"asynchronous exponential, set this config to True." +msgstr "是否启用异步指数重叠。要启用异步指数,请将此配置设置为 True。" -#: ../../user_guide/configuration/additional_config.md:37 -msgid "The details of each config option are as follows:" -msgstr "每个配置选项的详细信息如下:" +#: ../../source/user_guide/configuration/additional_config.md +msgid "`enable_shared_expert_dp`" +msgstr "`enable_shared_expert_dp`" -#: ../../user_guide/configuration/additional_config.md:39 -msgid "**torchair_graph_config**" -msgstr "**torchair_graph_config**" - -#: ../../user_guide/configuration/additional_config.md -msgid "`enabled`" -msgstr "`启用`" - -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md msgid "" -"Whether to enable torchair graph mode. Currently only DeepSeek series models" -" and PanguProMoE are supported to use torchair graph mode" -msgstr "是否启用 torchair 图模式。目前仅支持 DeepSeek 系列模型和 PanguProMoE 使用 torchair 图模式。" +"When the expert is shared in DP, it delivers better performance but " +"consumes more memory. Currently only DeepSeek series models are " +"supported." +msgstr "当专家在 DP 中共享时,可获得更好的性能但会消耗更多内存。目前仅支持 DeepSeek 系列模型。" -#: ../../user_guide/configuration/additional_config.md -msgid "`enable_multistream_mla`" -msgstr "`enable_multistream_mla`" - -#: ../../user_guide/configuration/additional_config.md -msgid "" -"Whether to put vector ops of MLA to another stream. This option only takes " -"effects on models using MLA (e.g., DeepSeek)." -msgstr "是否将MLA的向量操作放到另一个流中。此选项仅对使用MLA的模型(例如,DeepSeek)有效。" - -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md msgid "`multistream_overlap_shared_expert`" msgstr "`multistream_overlap_shared_expert`" -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md msgid "" -"Whether to enable multistream shared expert. This option only takes effects " -"on DeepSeek moe models." -msgstr "是否启用多流共享专家功能。此选项仅对 DeepSeek MoE 模型生效。" +"Whether to enable multi-stream shared expert. This option only takes " +"effect on MoE models with shared experts." +msgstr "是否启用多流共享专家。此选项仅对具有共享专家的 MoE 模型生效。" -#: ../../user_guide/configuration/additional_config.md -msgid "`enable_view_optimize`" -msgstr "`enable_view_optimize` (启用视图优化)" +#: ../../source/user_guide/configuration/additional_config.md +msgid "`multistream_overlap_gate`" +msgstr "`multistream_overlap_gate`" -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md +msgid "" +"Whether to enable multi-stream overlap gate. This option only takes " +"effect on MoE models with shared experts." +msgstr "是否启用多流重叠门。此选项仅对具有共享专家的 MoE 模型生效。" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`recompute_scheduler_enable`" +msgstr "`recompute_scheduler_enable`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "Whether to enable recompute scheduler." +msgstr "是否启用重计算调度器。" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`enable_cpu_binding`" +msgstr "`enable_cpu_binding`" + +#: ../../source/user_guide/configuration/additional_config.md msgid "`True`" msgstr "`True`" -#: ../../user_guide/configuration/additional_config.md -msgid "Whether to enable torchair view optimization" -msgstr "是否启用torchair视图优化" +#: ../../source/user_guide/configuration/additional_config.md +msgid "" +"Whether to enable CPU binding. Only takes effect on ARM CPUs; A3 uses the" +" global-slicing CPU allocation strategy and other device types use the " +"topo-affinity CPU allocation strategy." +msgstr "是否启用 CPU 绑定。仅在 ARM CPU 上生效;A3 使用全局切片 CPU 分配策略,其他设备类型使用拓扑亲和性 CPU 分配策略。" -#: ../../user_guide/configuration/additional_config.md -msgid "`use_cached_graph`" -msgstr "`use_cached_graph`" +#: ../../source/user_guide/configuration/additional_config.md +msgid "`SLO_limits_for_dynamic_batch`" +msgstr "`SLO_limits_for_dynamic_batch`" -#: ../../user_guide/configuration/additional_config.md -msgid "Whether to use cached graph" -msgstr "是否使用缓存的图" +#: ../../source/user_guide/configuration/additional_config.md +msgid "int" +msgstr "int" -#: ../../user_guide/configuration/additional_config.md -msgid "`graph_batch_sizes`" -msgstr "`graph_batch_sizes`" +#: ../../source/user_guide/configuration/additional_config.md +msgid "`-1`" +msgstr "`-1`" -#: ../../user_guide/configuration/additional_config.md -msgid "list[int]" -msgstr "list[int]" +#: ../../source/user_guide/configuration/additional_config.md +msgid "" +"SLO limits for dynamic batch. This is new scheduler to support dynamic " +"batch feature" +msgstr "动态批处理的 SLO 限制。这是支持动态批处理功能的新调度器。" -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md +msgid "`enable_npugraph_ex`" +msgstr "`enable_npugraph_ex`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "Whether to enable npugraph_ex graph mode." +msgstr "是否启用 npugraph_ex 图模式。" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`pa_shape_list`" +msgstr "`pa_shape_list`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "list" +msgstr "list" + +#: ../../source/user_guide/configuration/additional_config.md msgid "`[]`" msgstr "`[]`" -#: ../../user_guide/configuration/additional_config.md -msgid "The batch size for torchair graph cache" -msgstr "torchair 图缓存的批量大小" +#: ../../source/user_guide/configuration/additional_config.md +msgid "The custom shape list of page attention ops." +msgstr "页面注意力算子的自定义形状列表。" -#: ../../user_guide/configuration/additional_config.md -msgid "`graph_batch_sizes_init`" -msgstr "`graph_batch_sizes_init`" - -#: ../../user_guide/configuration/additional_config.md -msgid "Init graph batch size dynamically if `graph_batch_sizes` is empty" -msgstr "如果 `graph_batch_sizes` 为空,则动态初始化图批大小" - -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md msgid "`enable_kv_nz`" msgstr "`enable_kv_nz`" -#: ../../user_guide/configuration/additional_config.md +#: ../../source/user_guide/configuration/additional_config.md msgid "" -"Whether to enable kvcache NZ layout. This option only takes effects on " +"Whether to enable KV cache NZ layout. This option only takes effects on " "models using MLA (e.g., DeepSeek)." -msgstr "是否启用 kvcache NZ 布局。此选项仅对使用 MLA 的模型(例如 DeepSeek)生效。" +msgstr "是否启用 KV 缓存 NZ 布局。此选项仅对使用 MLA 的模型(例如 DeepSeek)生效。" -#: ../../user_guide/configuration/additional_config.md:52 -msgid "**ascend_scheduler_config**" -msgstr "**ascend_scheduler_config**" +#: ../../source/user_guide/configuration/additional_config.md +msgid "`layer_sharding`" +msgstr "`layer_sharding`" -#: ../../user_guide/configuration/additional_config.md -msgid "Whether to enable ascend scheduler for V1 engine" -msgstr "是否为 V1 引擎启用 ascend 调度器" - -#: ../../user_guide/configuration/additional_config.md:58 +#: ../../source/user_guide/configuration/additional_config.md msgid "" -"ascend_scheduler_config also support the options from [vllm scheduler " -"config](https://docs.vllm.ai/en/stable/api/vllm/config.html#vllm.config.SchedulerConfig)." -" For example, you can add `enable_chunked_prefill: True` to " -"ascend_scheduler_config as well." -msgstr "" -"ascend_scheduler_config 也支持来自 [vllm scheduler " -"config](https://docs.vllm.ai/en/stable/api/vllm/config.html#vllm.config.SchedulerConfig)" -" 的选项。例如,你也可以在 ascend_scheduler_config 中添加 `enable_chunked_prefill: True`。" +"Configuration options for Layer Sharding Linear. In PD-disaggregated " +"deployments, it is supported only on P nodes with " +"`kv_role=\"kv_producer\"`." +msgstr "层分片线性层的配置选项。在 PD 解耦部署中,仅支持在 `kv_role=\"kv_producer\"` 的 P 节点上使用。" -#: ../../user_guide/configuration/additional_config.md:60 +#: ../../source/user_guide/configuration/additional_config.md +msgid "`enable_sparse_c8`" +msgstr "`enable_sparse_c8`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "" +"Whether to enable KV cache C8 in DSA models (e.g., DeepSeekV3.2 and " +"GLM5). Not supported on A5 devices now" +msgstr "是否在 DSA 模型(例如 DeepSeekV3.2 和 GLM5)中启用 KV 缓存 C8。目前 A5 设备不支持。" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`enable_mc2_hierarchy_comm`" +msgstr "`enable_mc2_hierarchy_comm`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "Enable dispatch/combine op inter-node communication by ROCE." +msgstr "通过 ROCE 启用分发/组合算子的节点间通信。" + +#: ../../source/user_guide/configuration/additional_config.md:50 +msgid "The details of each configuration option are as follows:" +msgstr "每个配置选项的详细信息如下:" + +#: ../../source/user_guide/configuration/additional_config.md:52 +msgid "**xlite_graph_config**" +msgstr "**xlite_graph_config**" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`enabled`" +msgstr "`enabled`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "" +"Whether to enable Xlite graph mode. Currently only Llama, Qwen dense " +"series models, and Qwen3-VL are supported." +msgstr "是否启用 Xlite 图模式。目前仅支持 Llama、Qwen 稠密系列模型和 Qwen3-VL。" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`full_mode`" +msgstr "`full_mode`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "" +"Whether to enable Xlite for both the prefill and decode stages. By " +"default, Xlite is only enabled for the decode stage." +msgstr "是否在预填充和解码阶段都启用 Xlite。默认情况下,Xlite 仅对解码阶段启用。" + +#: ../../source/user_guide/configuration/additional_config.md:59 +msgid "**weight_prefetch_config**" +msgstr "**weight_prefetch_config**" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "Whether to enable weight prefetch." +msgstr "是否启用权重预取。" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`prefetch_ratio`" +msgstr "`prefetch_ratio`" + +#: ../../source/user_guide/configuration/additional_config.md +#, python-brace-format +msgid "" +"`{\"attn\": {\"qkv\": 1.0, \"o\": 1.0}, \"moe\": {\"gate_up\": 0.8}, " +"\"mlp\": { \"gate_up\": 1.0, \"down\": 1.0}}`" +msgstr "`{\"attn\": {\"qkv\": 1.0, \"o\": 1.0}, \"moe\": {\"gate_up\": 0.8}, \"mlp\": { \"gate_up\": 1.0, \"down\": 1.0}}`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "Prefetch ratio of each weight." +msgstr "各权重的预取比例。" + +#: ../../source/user_guide/configuration/additional_config.md:66 +msgid "**finegrained_tp_config**" +msgstr "**finegrained_tp_config**" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`lmhead_tensor_parallel_size`" +msgstr "`lmhead_tensor_parallel_size`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`0`" +msgstr "`0`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "The custom tensor parallel size of lm_head." +msgstr "lm_head 的自定义张量并行大小。" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`oproj_tensor_parallel_size`" +msgstr "`oproj_tensor_parallel_size`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "The custom tensor parallel size of o_proj." +msgstr "o_proj 的自定义张量并行大小。" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`embedding_tensor_parallel_size`" +msgstr "`embedding_tensor_parallel_size`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "The custom tensor parallel size of embedding." +msgstr "embedding 的自定义张量并行大小。" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`mlp_tensor_parallel_size`" +msgstr "`mlp_tensor_parallel_size`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "The custom tensor parallel size of mlp." +msgstr "mlp 的自定义张量并行大小。" + +#: ../../source/user_guide/configuration/additional_config.md:75 +msgid "**ascend_compilation_config**" +msgstr "**ascend_compilation_config**" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "Whether to enable npugraph_ex backend." +msgstr "是否启用 npugraph_ex 后端。" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`enable_static_kernel`" +msgstr "`enable_static_kernel`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "" +"Whether to enable static kernel. Suitable for scenarios where shape " +"changes are minimal and some time is available for static kernel " +"compilation." +msgstr "是否启用静态内核。适用于形状变化极小且有时间为静态内核编译的场景。" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`fuse_norm_quant`" +msgstr "`fuse_norm_quant`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "Whether to enable fuse_norm_quant pass." +msgstr "是否启用 fuse_norm_quant 优化过程。" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`fuse_qknorm_rope`" +msgstr "`fuse_qknorm_rope`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "" +"Whether to enable fuse_qknorm_rope pass. If Triton is not in the " +"environment, set it to False." +msgstr "是否启用 fuse_qknorm_rope 优化过程。如果环境中没有 Triton,请将其设置为 False。" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`fuse_allreduce_rms`" +msgstr "`fuse_allreduce_rms`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "" +"Whether to enable fuse_allreduce_rms pass. It's set to False because of " +"conflict with SP." +msgstr "是否启用 fuse_allreduce_rms 优化过程。由于与 SP 冲突,默认设置为 False。" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`fuse_muls_add`" +msgstr "`fuse_muls_add`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "Whether to enable fuse_muls_add pass." +msgstr "是否启用 fuse_muls_add 优化过程。" + +#: ../../source/user_guide/configuration/additional_config.md:86 +msgid "**eplb_config**" +msgstr "**eplb_config**" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`dynamic_eplb`" +msgstr "`dynamic_eplb`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "Whether to enable dynamic EPLB." +msgstr "是否启用动态 EPLB。" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`expert_map_path`" +msgstr "`expert_map_path`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "" +"When using expert load balancing for an MoE model, an expert map path " +"needs to be passed in." +msgstr "为 MoE 模型使用专家负载均衡时,需要传入专家映射路径。" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`expert_heat_collection_interval`" +msgstr "`expert_heat_collection_interval`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`400`" +msgstr "`400`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "Forward iterations when EPLB begins." +msgstr "EPLB 开始时的前向传播迭代次数。" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`algorithm_execution_interval`" +msgstr "`algorithm_execution_interval`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`30`" +msgstr "`30`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "The forward iterations when the EPLB worker will finish CPU tasks." +msgstr "EPLB 工作进程完成 CPU 任务所需的前向传播迭代次数。" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`expert_map_record_path`" +msgstr "`expert_map_record_path`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "" +"Save the expert load calculation results to a new expert table in the " +"specified directory." +msgstr "将专家负载计算结果保存到指定目录下的新专家表中。" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "`num_redundant_experts`" +msgstr "`num_redundant_experts`" + +#: ../../source/user_guide/configuration/additional_config.md +msgid "Specify redundant experts during initialization." +msgstr "在初始化时指定冗余专家数量。" + +#: ../../source/user_guide/configuration/additional_config.md:97 msgid "Example" msgstr "示例" -#: ../../user_guide/configuration/additional_config.md:62 +#: ../../source/user_guide/configuration/additional_config.md:99 msgid "An example of additional configuration is as follows:" msgstr "以下是额外配置的一个示例:" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/index.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/index.po new file mode 100644 index 00000000..48bb76d4 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/index.po @@ -0,0 +1,25 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/deployment_guide/index.md:1 +#: ../../source/user_guide/deployment_guide/index.md:3 +msgid "Deployment Guide" +msgstr "部署指南" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/using_volcano_kthena.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/using_volcano_kthena.po new file mode 100644 index 00000000..1361c854 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/using_volcano_kthena.po @@ -0,0 +1,293 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:1 +msgid "Using Volcano Kthena" +msgstr "使用 Volcano Kthena" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:3 +msgid "" +"This guide shows how to run **prefill–decode (PD) disaggregation** on " +"Huawei Ascend NPUs using **vLLM-Ascend**, with " +"[**Kthena**](https://kthena.volcano.sh/) handling orchestration on " +"Kubernetes. About vLLM support with Kthena, please refer to [Deploy vLLM " +"with " +"Kthena](https://docs.vllm.ai/en/latest/deployment/integrations/kthena/)." +msgstr "" +"本指南展示了如何在华为昇腾 NPU 上使用 **vLLM-Ascend** 运行**预填充-解码(PD)解耦**,并由 " +"[**Kthena**](https://kthena.volcano.sh/) 在 Kubernetes 上处理编排。关于 vLLM 与 Kthena 的集成支持,请参阅[使用 " +"Kthena 部署 vLLM](https://docs.vllm.ai/en/latest/deployment/integrations/kthena/)。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:7 +msgid "1. What is Prefill–Decode Disaggregation?" +msgstr "1. 什么是预填充-解码解耦?" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:9 +msgid "Large language model inference naturally splits into two phases:" +msgstr "大语言模型推理自然分为两个阶段:" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:11 +msgid "**Prefill**" +msgstr "**预填充**" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:12 +msgid "Processes input tokens and builds the key–value (KV) cache." +msgstr "处理输入令牌并构建键值(KV)缓存。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:13 +msgid "Batch-friendly, high-throughput, well-suited to parallel NPU execution." +msgstr "批处理友好,高吞吐量,非常适合并行 NPU 执行。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:14 +msgid "**Decode**" +msgstr "**解码**" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:15 +msgid "Consumes the KV cache to generate output tokens." +msgstr "消耗 KV 缓存以生成输出令牌。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:16 +msgid "Latency-sensitive, memory-intensive, more sequential." +msgstr "延迟敏感,内存密集型,更具顺序性。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:18 +msgid "" +"From the client's perspective, this still looks like a single Chat / " +"Completions endpoint." +msgstr "从客户端的角度来看,这仍然像一个单一的聊天/补全端点。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:22 +msgid "2. Deploy on Kubernetes with Kthena" +msgstr "2. 使用 Kthena 在 Kubernetes 上部署" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:24 +msgid "" +"[Kthena](https://kthena.volcano.sh/) is a Kubernetes-native LLM inference" +" platform that transforms how organizations deploy and manage Large " +"Language Models in production. Built with declarative model lifecycle " +"management and intelligent request routing, it provides high-performance " +"and enterprise-grade scalability for LLM inference workloads. In this " +"example, we use three key Custom Resource Definitions (CRDs):" +msgstr "" +"[Kthena](https://kthena.volcano.sh/) 是一个 Kubernetes 原生的 LLM 推理平台,它改变了组织在生产环境中部署和管理大语言模型的方式。它基于声明式模型生命周期管理和智能请求路由构建,为 LLM 推理工作负载提供高性能和企业级的可扩展性。在本示例中,我们使用三个关键的自定义资源定义(CRD):" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:26 +msgid "`ModelServing` — defines the workloads (prefill and decode roles)." +msgstr "`ModelServing` — 定义工作负载(预填充和解码角色)。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:27 +msgid "`ModelServer` — manages PD groupings and internal routing." +msgstr "`ModelServer` — 管理 PD 分组和内部路由。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:28 +msgid "`ModelRoute` — exposes a stable model endpoint." +msgstr "`ModelRoute` — 暴露一个稳定的模型端点。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:30 +msgid "" +"This section uses the `deepseek-ai/DeepSeek-V2-Lite` example, but you can" +" swap in any model supported by vLLM-Ascend." +msgstr "本节使用 `deepseek-ai/DeepSeek-V2-Lite` 示例,但您可以替换为 vLLM-Ascend 支持的任何模型。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:32 +msgid "2.1 Prerequisites" +msgstr "2.1 先决条件" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:34 +msgid "Kubernetes cluster with Ascend NPU nodes:" +msgstr "包含昇腾 NPU 节点的 Kubernetes 集群:" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:36 +msgid "" +"The resources corresponding to different NPU Drivers may vary slightly. " +"For example:" +msgstr "不同 NPU 驱动对应的资源可能略有不同。例如:" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:38 +#, python-format +msgid "" +"If using [MindCluster](https://gitee.com/ascend/mind-" +"cluster#https://gitee.com/link?target=https%3A%2F%2Fgitcode.com%2FAscend" +"%2Fmind-cluster), please use `huawei.com/Ascend310P` or " +"`huawei.com/Ascend910`." +msgstr "" +"如果使用 [MindCluster](https://gitee.com/ascend/mind-" +"cluster#https://gitee.com/link?target=https%3A%2F%2Fgitcode.com%2FAscend%2Fmind-cluster),请使用 " +"`huawei.com/Ascend310P` 或 `huawei.com/Ascend910`。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:40 +msgid "" +"If running on CCE (Cloud Container Engine) of Huawei Cloud and the [CCE " +"AI Suite Plugin (Ascend NPU)](https://support.huaweicloud.com/intl/en-us" +"/usermanual-cce/cce_10_0239.html) is installed, please use " +"`huawei.com/ascend-310` or `huawei.com/ascend-1980`." +msgstr "" +"如果在华为云的 CCE(云容器引擎)上运行并且安装了 [CCE AI 套件插件(昇腾 " +"NPU)](https://support.huaweicloud.com/intl/en-us/usermanual-cce/cce_10_0239.html),请使用 " +"`huawei.com/ascend-310` 或 `huawei.com/ascend-1980`。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:42 +msgid "" +"Kthena installed. Please follow the [Kthena installation " +"guide](https://kthena.volcano.sh/docs/getting-started/installation)." +msgstr "已安装 Kthena。请遵循 [Kthena 安装指南](https://kthena.volcano.sh/docs/getting-started/installation)。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:44 +msgid "2.2 Deploy Prefill-Decode Disaggregated DeepSeek-V2-Lite on Kubernetes" +msgstr "2.2 在 Kubernetes 上部署预填充-解码解耦的 DeepSeek-V2-Lite" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:46 +msgid "" +"A concrete example is provided in Kthena as " +msgstr "" +"Kthena 中提供了一个具体示例:" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:48 +msgid "Deploy it with the command below:" +msgstr "使用以下命令部署:" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:54 +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:315 +msgid "or" +msgstr "或" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:293 +msgid "You should see Pods such as:" +msgstr "您应该会看到类似以下的 Pod:" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:295 +msgid "`deepseek-v2-lite-0-prefill-0-0`" +msgstr "`deepseek-v2-lite-0-prefill-0-0`" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:296 +msgid "`deepseek-v2-lite-0-decode-0-0`" +msgstr "`deepseek-v2-lite-0-decode-0-0`" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:298 +msgid "" +"To enable the LLM access, we still need to configure the routing layer " +"with `ModelServer` and `ModelRoute`." +msgstr "要启用 LLM 访问,我们仍然需要使用 `ModelServer` 和 `ModelRoute` 配置路由层。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:300 +msgid "2.3 ModelServer: PD Group Management" +msgstr "2.3 ModelServer:PD 分组管理" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:302 +msgid "The `ModelServer` resource:" +msgstr "`ModelServer` 资源:" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:304 +msgid "Selects the `ModelServing` workloads via labels." +msgstr "通过标签选择 `ModelServing` 工作负载。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:305 +msgid "Groups prefill and decode Pods into PD pairs." +msgstr "将预填充和解码 Pod 分组为 PD 对。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:306 +msgid "Configures KV connector details and timeouts." +msgstr "配置 KV 连接器详细信息和超时设置。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:307 +msgid "Exposes an internal gRPC/HTTP interface." +msgstr "暴露一个内部的 gRPC/HTTP 接口。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:309 +msgid "Create ModelServer with the command below:" +msgstr "使用以下命令创建 ModelServer:" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:345 +msgid "2.4 ModelRoute: User-Facing Endpoint" +msgstr "2.4 ModelRoute:面向用户的端点" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:347 +msgid "" +"The `ModelRoute` resource maps a model name (e.g., `\"deepseek-" +"ai/DeepSeekV2\"`) to the `ModelServer`." +msgstr "`ModelRoute` 资源将模型名称(例如 `\"deepseek-ai/DeepSeekV2\"`)映射到 `ModelServer`。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:349 +msgid "Example manifest:" +msgstr "示例清单:" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:369 +msgid "3. Verification" +msgstr "3. 验证" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:371 +msgid "3.1 Check Workloads" +msgstr "3.1 检查工作负载" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:373 +msgid "Confirm that prefill and decode Pods are up:" +msgstr "确认预填充和解码 Pod 已启动:" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:382 +msgid "You should see both roles in `Running` and `Ready` state." +msgstr "您应该看到两个角色都处于 `Running` 和 `Ready` 状态。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:384 +msgid "3.2 Test the Chat Endpoint" +msgstr "3.2 测试聊天端点" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:386 +msgid "" +"Once routing is configured, you can send a test request to the Kthena-" +"router:" +msgstr "路由配置完成后,您可以向 Kthena-router 发送测试请求:" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:406 +msgid "A successful JSON response confirms that:" +msgstr "成功的 JSON 响应确认了:" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:408 +msgid "The prefill and decode services are both running on Ascend NPUs." +msgstr "预填充和解码服务都在昇腾 NPU 上运行。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:409 +msgid "KV transfer between them is working." +msgstr "它们之间的 KV 传输正常工作。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:410 +msgid "The Kthena routing layer is correctly fronting the vLLM-Ascend plugin." +msgstr "Kthena 路由层正确地作为 vLLM-Ascend 插件的前端。" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:414 +msgid "4. Cleanup" +msgstr "4. 清理" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:416 +msgid "To remove the deployment:" +msgstr "要移除部署:" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:431 +msgid "5. Summary" +msgstr "5. 总结" + +#: ../../source/user_guide/deployment_guide/using_volcano_kthena.md:433 +msgid "" +"For more advanced features, please refer to the [Kthena " +"website](https://kthena.volcano.sh/)." +msgstr "有关更多高级功能,请参阅 [Kthena 网站](https://kthena.volcano.sh/)。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Fine_grained_TP.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Fine_grained_TP.po new file mode 100644 index 00000000..1119975f --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Fine_grained_TP.po @@ -0,0 +1,307 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:1 +msgid "Fine-Grained Tensor Parallelism (Finegrained TP)" +msgstr "细粒度张量并行 (Finegrained TP)" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:3 +msgid "Overview" +msgstr "概述" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:5 +msgid "" +"Fine-Grained Tensor Parallelism (Fine-grained TP) extends standard tensor" +" parallelism by enabling **independent tensor-parallel sizes for " +"different model components**. Instead of applying a single global " +"`tensor_parallel_size` to all layers, Fine-grained TP allows users to " +"configure separate TP sizes for key modules—such as embedding, language " +"model head (lm_head), attention output projection (o_proj), and MLP " +"blocks—via the `finegrained_tp_config` parameter." +msgstr "" +"细粒度张量并行 (Fine-grained TP) 扩展了标准张量并行,允许为**不同的模型组件设置独立的张量并行规模**。与对所有层应用单一的全局 `tensor_parallel_size` 不同,细粒度 TP 允许用户通过 `finegrained_tp_config` 参数为关键模块(如嵌入层、语言模型头部 (lm_head)、注意力输出投影层 (o_proj) 和 MLP 块)配置独立的 TP 规模。" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:7 +msgid "" +"This capability supports heterogeneous parallelism strategies within a " +"single model, providing finer control over weight distribution, memory " +"layout, and communication patterns across devices. The feature is " +"compatible with standard dense transformer architectures and integrates " +"seamlessly into vLLM’s serving pipeline." +msgstr "" +"此功能支持在单个模型内使用异构并行策略,从而能更精细地控制跨设备的权重分布、内存布局和通信模式。该特性与标准的密集 Transformer 架构兼容,并能无缝集成到 vLLM 的服务流水线中。" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:11 +msgid "Benefits of Finegrained TP" +msgstr "细粒度 TP 的优势" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:13 +msgid "" +"Fine-Grained Tensor Parallelism delivers two primary performance " +"advantages through targeted weight sharding:" +msgstr "细粒度张量并行通过有针对性的权重分片带来两个主要的性能优势:" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:15 +msgid "" +"**Reduced Per-Device Memory Footprint**: Fine-grained TP shards large " +"weight matrices(e.g., LM Head, o_proj)across devices, lowering peak " +"memory usage and enabling larger batches or deployment on memory-limited " +"hardware—without quantization." +msgstr "" +"**降低单设备内存占用**: 细粒度 TP 将大型权重矩阵(例如 LM Head、o_proj)分片到多个设备上,降低了峰值内存使用量,从而支持更大的批次或在内存受限的硬件上进行部署——无需量化。" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:18 +msgid "" +"**Faster Memory Access in GEMMs**: In decode-heavy workloads, GEMM " +"performance is often memory-bound. Weight sharding reduces per-device " +"weight fetch volume, cutting DRAM traffic and improving bandwidth " +"efficiency—especially for latency-sensitive layers like LM Head and " +"o_proj." +msgstr "" +"**加速 GEMM 中的内存访问**: 在解码密集型工作负载中,GEMM 性能通常受内存带宽限制。权重分片减少了每个设备需要获取的权重数据量,从而降低了 DRAM 流量并提高了带宽效率——对于 LM Head 和 o_proj 等延迟敏感层尤其如此。" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:21 +msgid "" +"Together, these effects allow practitioners to better balance memory, " +"communication, and compute—particularly in high-concurrency serving " +"scenarios—while maintaining compatibility with standard dense transformer" +" models." +msgstr "综合来看,这些效果使实践者能够更好地平衡内存、通信和计算——尤其是在高并发服务场景中——同时保持与标准密集 Transformer 模型的兼容性。" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:25 +msgid "Supported Scenarios" +msgstr "支持场景" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:27 +msgid "Models" +msgstr "模型" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:29 +msgid "" +"Fine-grained TP is **model-agnostic** and supports all standard dense " +"transformer architectures, including Llama, Qwen, DeepSeek (base/dense " +"variants), and others." +msgstr "细粒度 TP 是**模型无关的**,支持所有标准的密集 Transformer 架构,包括 Llama、Qwen、DeepSeek(基础/密集变体)等。" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:31 +msgid "Component & Execution Mode Support" +msgstr "组件与执行模式支持" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "TP config" +msgstr "TP 配置" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "Eager" +msgstr "Eager" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "Graph" +msgstr "Graph" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "Hybrid" +msgstr "Hybrid" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "Prefill" +msgstr "Prefill" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "Decode" +msgstr "Decode" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "**embedding**" +msgstr "**embedding**" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "✅" +msgstr "✅" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "**o_proj**" +msgstr "**o_proj**" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "❌" +msgstr "❌" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "**mlp**" +msgstr "**mlp**" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "**LMhead**" +msgstr "**LMhead**" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:40 +msgid "⚠️ Note:" +msgstr "⚠️ 注意:" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:42 +msgid "" +"`o_proj` TP is only supported in Graph mode during Decode, because " +"dummy_run in eager mode will not trigger o_proj." +msgstr "`o_proj` TP 仅在 Decode 阶段的 Graph 模式下受支持,因为 eager 模式下的 dummy_run 不会触发 o_proj。" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:43 +msgid "" +"`mlp` TP supports dense models, or dense layers in MoE models. For " +"example, the first three dense layers of DeepSeek-R1." +msgstr "`mlp` TP 支持密集模型,或 MoE 模型中的密集层。例如,DeepSeek-R1 的前三个密集层。" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:45 +msgid "Configuration Limit" +msgstr "配置限制" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:47 +msgid "The Fine-Grained TP size for any component must:" +msgstr "任何组件的细粒度 TP 规模必须满足:" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:49 +msgid "Be **≤ the data-parallel (DP) size**, and" +msgstr "**≤ 数据并行 (DP) 规模**,并且" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:50 +msgid "" +"**Evenly divide the DP size** (i.e., `dp_size % tp_size == 0`) to ensure " +"valid device assignment and communication grouping." +msgstr "**能整除 DP 规模**(即 `dp_size % tp_size == 0`),以确保有效的设备分配和通信分组。" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:52 +msgid "" +"⚠️ Violating these constraints will result in runtime errors or undefined" +" behavior." +msgstr "⚠️ 违反这些约束将导致运行时错误或未定义行为。" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:56 +msgid "How to Use Finegrained TP" +msgstr "如何使用细粒度 TP" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:58 +msgid "Configuration Format" +msgstr "配置格式" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:60 +msgid "" +"Fine-grained TP is controlled via the `finegrained_tp_config` field " +"inside `--additional-config`." +msgstr "细粒度 TP 通过 `--additional-config` 内的 `finegrained_tp_config` 字段控制。" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:73 +msgid "Example Usage" +msgstr "使用示例" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:91 +msgid "Experimental Results" +msgstr "实验结果" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:93 +msgid "" +"To evaluate the effectiveness of fine-grained TP in large-scale service " +"scenarios, we use the model **DeepSeek-R1-W8A8**, deploy PD separated " +"decode instances in an environment of 32 cards Ascend 910B*64G (A2), with" +" parallel configuration as DP32+EP32, and fine-grained TP size of 8; the " +"performance data is as follows." +msgstr "为评估细粒度 TP 在大规模服务场景中的有效性,我们使用模型 **DeepSeek-R1-W8A8**,在 32 卡 Ascend 910B*64G (A2) 环境中部署 PD 分离的解码实例,并行配置为 DP32+EP32,细粒度 TP 规模为 8;性能数据如下。" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "Module" +msgstr "模块" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "Memory Savings" +msgstr "内存节省" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "TPOT Impact (batch=24)" +msgstr "TPOT 影响 (batch=24)" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "o_proj TP = 8" +msgstr "o_proj TP = 8" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "5.8 GB" +msgstr "5.8 GB" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "**+1.5 ms** (degradation)" +msgstr "**+1.5 ms** (性能下降)" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "LM head TP = 8" +msgstr "LM head TP = 8" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "1.51 GB" +msgstr "1.51 GB" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "**−1.2 ms** (improvement)" +msgstr "**−1.2 ms** (性能提升)" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "FFN TP = 8" +msgstr "FFN TP = 8" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "0.9 GB" +msgstr "0.9 GB" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "**−1.0 ms** (improvement)" +msgstr "**−1.0 ms** (性能提升)" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "Embedding TP = 8" +msgstr "Embedding TP = 8" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "**Total**" +msgstr "**总计**" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "**9.72 GB**" +msgstr "**9.72 GB**" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md +msgid "—" +msgstr "—" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:103 +msgid "" +"We achieved significant gains in terms of high memory capacity on a " +"single card, as well as the benefits of TPOT." +msgstr "我们在单卡高内存容量以及 TPOT 优势方面取得了显著收益。" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:107 +msgid "✅ Deployment Recommendations" +msgstr "✅ 部署建议" + +#: ../../source/user_guide/feature_guide/Fine_grained_TP.md:109 +msgid "" +"Fine-grained TP is the **most effective** in the **decode instance** of " +"PD separation, where models are typically deployed in all-DP mode. In " +"this setup, sharding weight-heavy layers reduces redundant storage and " +"memory pressure." +msgstr "细粒度 TP 在 PD 分离的**解码实例**中**最有效**,因为模型通常以全 DP 模式部署。在此设置中,对权重密集的层进行分片可以减少冗余存储和内存压力。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Multi_Token_Prediction.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Multi_Token_Prediction.po new file mode 100644 index 00000000..6c45f4a6 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Multi_Token_Prediction.po @@ -0,0 +1,233 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:1 +msgid "Multi Token Prediction (MTP)" +msgstr "多令牌预测 (MTP)" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:3 +msgid "Why We Need MTP" +msgstr "为何需要 MTP" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:5 +msgid "" +"MTP boosts inference performance by parallelizing the prediction of " +"multiple tokens, shifting from single-token to multi-token generation. " +"This approach significantly increases generation throughput and achieves " +"multiplicative acceleration in inference speed—all without compromising " +"output quality." +msgstr "" +"MTP 通过并行预测多个令牌来提升推理性能,从单令牌生成转向多令牌生成。这种方法显著提高了生成吞吐量,并在不牺牲输出质量的前提下,实现了推理速度的倍增加速。" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:7 +msgid "How to Use MTP" +msgstr "如何使用 MTP" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:9 +msgid "" +"To enable MTP for DeepSeek-V3 models, add the following parameter when " +"starting the service:" +msgstr "要为 DeepSeek-V3 模型启用 MTP,请在启动服务时添加以下参数:" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:11 +#, python-brace-format +msgid "" +"--speculative_config ' {\"method\": \"mtp\", \"num_speculative_tokens\": " +"1, \"disable_padded_drafter_batch\": False} '" +msgstr "" +"--speculative_config ' {\"method\": \"mtp\", \"num_speculative_tokens\": " +"1, \"disable_padded_drafter_batch\": False} '" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:13 +msgid "" +"`num_speculative_tokens`: The number of speculative tokens that enables " +"the model to predict multiple tokens at once, if provided. It will " +"default to the number in the draft model config if present, otherwise, it" +" is required." +msgstr "" +"`num_speculative_tokens`:推测性令牌的数量,如果提供,则使模型能够一次预测多个令牌。如果草稿模型配置中存在此值,则默认使用该值,否则必须提供。" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:14 +msgid "" +"`disable_padded_drafter_batch`: Disable input padding for speculative " +"decoding. If set to True, speculative input batches can contain sequences" +" of different lengths, which may only be supported by certain attention " +"backends. This currently only affects the MTP method of speculation, " +"default is False." +msgstr "" +"`disable_padded_drafter_batch`:禁用推测解码的输入填充。如果设置为 True,推测输入批次可以包含不同长度的序列,这可能仅受某些注意力后端支持。目前这仅影响 MTP 推测方法,默认值为 False。" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:16 +msgid "How It Works" +msgstr "工作原理" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:18 +msgid "Module Architecture" +msgstr "模块架构" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:29 +msgid "**1. sample**" +msgstr "**1. 采样**" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:31 +msgid "" +"*rejection_sample.py*: During decoding, the main model processes the " +"previous round’s output token and the predicted token together (computing" +" 1+k tokens simultaneously). The first token is always correct, while the" +" second token—referred to as the **bonus token**—is uncertain since it is" +" derived from speculative prediction, thus we employ **Greedy Strategy** " +"and **Rejection Sampling Strategy** to determine whether the bonus token " +"should be accepted. The module structure consists of an " +"`AscendRejectionSampler` class with a forward method that implements the " +"specific sampling logic." +msgstr "" +"*rejection_sample.py*:在解码过程中,主模型同时处理上一轮的输出令牌和预测的令牌(同时计算 1+k 个令牌)。第一个令牌总是正确的,而第二个令牌(称为**奖励令牌**)则不确定,因为它源自推测性预测,因此我们采用**贪婪策略**和**拒绝采样策略**来决定是否应接受该奖励令牌。该模块结构包含一个 `AscendRejectionSampler` 类,其 forward 方法实现了具体的采样逻辑。" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:39 +msgid "**2. spec_decode**" +msgstr "**2. spec_decode**" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:41 +msgid "" +"This section encompasses the model preprocessing for spec-decode, " +"primarily structured as follows: it includes loading the model, executing" +" a dummy run, and generating token IDs. These steps collectively form the" +" model data construction and forward invocation for a single spec-decode " +"operation." +msgstr "本节涵盖了 spec-decode 的模型预处理,主要结构如下:包括加载模型、执行虚拟运行以及生成令牌 ID。这些步骤共同构成了单次 spec-decode 操作的模型数据构建和前向调用。" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:43 +msgid "" +"*mtp_proposer.py*: Configure vLLM-Ascend to use speculative decoding " +"where proposals are generated by DeepSeek MTP layer." +msgstr "*mtp_proposer.py*:配置 vLLM-Ascend 使用推测解码,其中提议由 DeepSeek MTP 层生成。" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:54 +msgid "Algorithm" +msgstr "算法" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:56 +msgid "**1. Rejection Sampling**" +msgstr "**1. 拒绝采样**" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:58 +msgid "*Greedy Strategy*" +msgstr "*贪婪策略*" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:60 +msgid "" +"Verify whether the token generated by the main model matches the " +"speculative token predicted by MTP in the previous round. If they match " +"exactly, accept the bonus token; otherwise, reject it and any subsequent " +"tokens derived from that speculation." +msgstr "验证主模型生成的令牌是否与上一轮 MTP 预测的推测令牌匹配。如果完全匹配,则接受奖励令牌;否则,拒绝该令牌以及源自该推测的任何后续令牌。" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:62 +msgid "*Rejection Sampling Strategy*" +msgstr "*拒绝采样策略*" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:64 +msgid "This method introduces stochasticity in rejection sampling." +msgstr "此方法在拒绝采样中引入了随机性。" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:66 +msgid "" +"For each draft token, acceptance is determined by verifying whether the " +"inequality `P_target / P_draft ≥ U` holds, where `P_target` represents " +"the probability assigned to the current draft token by the target model, " +"`P_draft` denotes the probability assigned by the draft model, and `U` is" +" a random number sampled uniformly from the interval [0, 1)." +msgstr "对于每个草稿令牌,通过验证不等式 `P_target / P_draft ≥ U` 是否成立来决定是否接受,其中 `P_target` 表示目标模型分配给当前草稿令牌的概率,`P_draft` 表示草稿模型分配的概率,`U` 是从区间 [0, 1) 均匀采样的随机数。" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:68 +msgid "" +"The decision logic for each draft token is as follows: if the inequality " +"`P_target / P_draft ≥ U` holds, the draft token is accepted as output; " +"conversely, if `P_target / P_draft < U`, the draft token is rejected." +msgstr "每个草稿令牌的决策逻辑如下:如果不等式 `P_target / P_draft ≥ U` 成立,则草稿令牌被接受作为输出;反之,如果 `P_target / P_draft < U`,则草稿令牌被拒绝。" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:70 +msgid "" +"When a draft token is rejected, a recovery sampling process is triggered " +"where a \"recovered token\" is resampled from the adjusted probability " +"distribution defined as `Q = max(P_target - P_draft, 0)`. In the current " +"MTP implementation, since `P_draft` is not provided and defaults to 1, " +"the formulas simplify such that token acceptance occurs when `P_target ≥ " +"U` and the recovery distribution becomes `Q = max(P_target - 1, 0)`." +msgstr "当草稿令牌被拒绝时,会触发恢复采样过程,从调整后的概率分布 `Q = max(P_target - P_draft, 0)` 中重新采样一个“恢复令牌”。在当前 MTP 实现中,由于未提供 `P_draft` 且默认为 1,公式简化为:当 `P_target ≥ U` 时令牌被接受,恢复分布变为 `Q = max(P_target - 1, 0)`。" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:72 +msgid "**2. Performance**" +msgstr "**2. 性能**" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:74 +msgid "" +"If the bonus token is accepted, the MTP model performs inference for " +"(num_speculative + 1) tokens, including original main model output token " +"and bonus token. If rejected, inference is performed for fewer tokens, " +"depending on how many tokens are accepted." +msgstr "如果奖励令牌被接受,MTP 模型将对 (num_speculative + 1) 个令牌执行推理,包括原始主模型输出令牌和奖励令牌。如果被拒绝,则根据接受了多少个令牌来执行更少令牌的推理。" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:76 +msgid "DFX" +msgstr "DFX" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:78 +msgid "Method Validation" +msgstr "方法验证" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:80 +msgid "" +"Currently, the spec_decode scenario only supports methods such as n-gram," +" EAGLE, EAGLE3, and MTP. If an incorrect parameter is passed for the " +"method, the code will raise an error to alert the user that an incorrect " +"method was provided." +msgstr "目前,spec_decode 场景仅支持 n-gram、EAGLE、EAGLE3 和 MTP 等方法。如果为方法传递了错误的参数,代码将引发错误以提醒用户提供了不正确的方法。" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:98 +msgid "Integer Validation" +msgstr "整数验证" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:100 +msgid "" +"The current npu_fused_infer_attention_score operator only supports " +"integers less than 16 per decode round. Therefore, the maximum supported " +"value for MTP is 15. If a value greater than 15 is provided, the code " +"will raise an error and alert the user." +msgstr "当前的 npu_fused_infer_attention_score 算子每轮解码仅支持小于 16 的整数。因此,MTP 支持的最大值为 15。如果提供了大于 15 的值,代码将引发错误并提醒用户。" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:111 +msgid "Limitations" +msgstr "限制" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:113 +msgid "" +"Due to the fact that only a single layer of weights is exposed in " +"DeepSeek's MTP, the accuracy and performance are not effectively " +"guaranteed in scenarios where MTP > 1 (especially MTP ≥ 3). Moreover, due" +" to current operator limitations, MTP supports a maximum of 15." +msgstr "由于 DeepSeek 的 MTP 仅暴露了单层权重,因此在 MTP > 1(尤其是 MTP ≥ 3)的场景下,准确性和性能无法得到有效保证。此外,由于当前算子限制,MTP 最多支持 15。" + +#: ../../source/user_guide/feature_guide/Multi_Token_Prediction.md:114 +msgid "" +"In the fullgraph mode with MTP > 1, the capture size of each ACLGraph " +"must be an integer multiple of (num_speculative_tokens + 1)." +msgstr "在 MTP > 1 的 fullgraph 模式下,每个 ACLGraph 的捕获大小必须是 (num_speculative_tokens + 1) 的整数倍。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/batch_invariance.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/batch_invariance.po new file mode 100644 index 00000000..85d60477 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/batch_invariance.po @@ -0,0 +1,214 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:1 +msgid "Batch Invariance" +msgstr "批次不变性" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:4 +msgid "" +"Batch invariance is currently in beta. Some features are still under " +"active development. Track progress and planned improvements at " +"" +msgstr "" +"批次不变性功能目前处于测试阶段。部分功能仍在积极开发中。请通过 " +" 跟踪进展和计划改进。" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:8 +msgid "" +"This document shows how to enable batch invariance in vLLM-Ascend. Batch " +"invariance ensures that the output of a model is deterministic and " +"independent of the batch size or the order of requests in a batch." +msgstr "" +"本文档介绍如何在 vLLM-Ascend 中启用批次不变性。批次不变性确保模型的输出是确定性的,且不依赖于批次大小或批次中请求的顺序。" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:10 +msgid "Motivation" +msgstr "动机" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:12 +msgid "Batch invariance is crucial for several use cases:" +msgstr "批次不变性对于以下几个用例至关重要:" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:14 +msgid "" +"**Framework debugging**: Deterministic outputs make it easier to debug " +"issues in the inference framework, as the same input will always produce " +"the same output regardless of batching." +msgstr "" +"**框架调试**:确定性输出使得调试推理框架中的问题更加容易,因为无论批处理方式如何,相同的输入总是产生相同的输出。" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:15 +msgid "" +"**Model debugging**: Helps identify issues in model implementations by " +"ensuring consistent behavior across different batch configurations." +msgstr "**模型调试**:通过确保在不同批次配置下行为一致,帮助识别模型实现中的问题。" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:16 +msgid "" +"**Reinforcement Learning (RL)**: RL training often requires deterministic" +" rollouts for reproducibility and stable training." +msgstr "**强化学习 (RL)**:RL 训练通常需要确定性的推演过程,以确保可复现性和稳定的训练。" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:17 +msgid "" +"**Large-scale inference systems**: Systems that use vLLM as a component " +"benefit from deterministic behavior for testing, validation, and " +"consistency guarantees." +msgstr "**大规模推理系统**:将 vLLM 作为组件的系统受益于确定性行为,便于测试、验证和保证一致性。" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:19 +msgid "Hardware Requirements" +msgstr "硬件要求" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:21 +msgid "" +"Batch invariance currently requires Ascend 910B NPUs, because only the " +"910B supports batch invariance with HCCL communication for now. We will " +"support other NPUs in the future." +msgstr "" +"批次不变性目前需要 Ascend 910B NPU,因为目前只有 910B 支持通过 HCCL 通信实现批次不变性。我们未来将支持其他 NPU。" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:24 +msgid "Software Requirements" +msgstr "软件要求" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:26 +msgid "" +"Batch invariance requires a customed operator library for 910B. We will " +"release the customed operator library in future versions." +msgstr "批次不变性需要为 910B 定制的算子库。我们将在未来版本中发布该定制算子库。" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:29 +msgid "Enabling Batch Invariance" +msgstr "启用批次不变性" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:31 +msgid "" +"Batch invariance can be enabled by setting the `VLLM_BATCH_INVARIANT` " +"environment variable to `1`:" +msgstr "可以通过将环境变量 `VLLM_BATCH_INVARIANT` 设置为 `1` 来启用批次不变性:" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:37 +msgid "Online Inference (Server Mode)" +msgstr "在线推理(服务器模式)" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:39 +msgid "To start a vLLM server with batch invariance enabled:" +msgstr "要启动一个启用了批次不变性的 vLLM 服务器:" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:45 +msgid "Then use the OpenAI-compatible client:" +msgstr "然后使用 OpenAI 兼容的客户端:" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:68 +msgid "Offline Inference" +msgstr "离线推理" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:70 +msgid "For offline batch inference with batch invariance:" +msgstr "对于启用批次不变性的离线批处理推理:" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:105 +msgid "Tested Models" +msgstr "已测试模型" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:107 +msgid "Batch invariance has been tested and verified on the following models:" +msgstr "批次不变性已在以下模型上经过测试和验证:" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:109 +msgid "**Qwen3 (Dense)**: `Qwen/Qwen3-1.7B`, `Qwen/Qwen3-8B`" +msgstr "**Qwen3 (稠密模型)**:`Qwen/Qwen3-1.7B`, `Qwen/Qwen3-8B`" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:110 +msgid "**Qwen3 (MoE)**: `Qwen/Qwen3-30B-A3B`" +msgstr "**Qwen3 (MoE 模型)**:`Qwen/Qwen3-30B-A3B`" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:112 +msgid "" +"Other models may also work, but these have been explicitly validated. If " +"you encounter issues with a specific model, please report them on the " +"[GitHub issue tracker](https://github.com/vllm-project/vllm-" +"ascend/issues/new/choose)." +msgstr "" +"其他模型也可能适用,但上述模型已明确经过验证。如果您在使用特定模型时遇到问题,请在 [GitHub 问题跟踪器](https://github.com/vllm-project/vllm-ascend/issues/new/choose) 上报告。" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:114 +msgid "Implementation Details" +msgstr "实现细节" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:116 +msgid "When batch invariance is enabled, vLLM:" +msgstr "当启用批次不变性时,vLLM 会:" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:118 +msgid "" +"Uses deterministic kernel implementations for attention and other " +"operations" +msgstr "对注意力机制和其他操作使用确定性的内核实现" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:119 +msgid "Ensures consistent numerical behavior across different batch sizes" +msgstr "确保在不同批次大小下具有一致的数值行为" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:120 +msgid "Disables certain optimizations that may introduce non-determinism" +msgstr "禁用某些可能引入非确定性的优化" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:123 +msgid "" +"Enabling batch invariance may impact performance compared to the default " +"non-deterministic mode. This trade-off is intentional to guarantee " +"reproducibility." +msgstr "与默认的非确定性模式相比,启用批次不变性可能会影响性能。这种权衡是为了保证可复现性而有意为之。" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:126 +msgid "Future Improvements" +msgstr "未来改进" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:128 +msgid "" +"The batch invariance feature is under active development. Planned " +"improvements include:" +msgstr "批次不变性功能正在积极开发中。计划的改进包括:" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:130 +msgid "Support for additional NPUs series" +msgstr "支持更多 NPU 系列" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:131 +msgid "Expanded model coverage" +msgstr "扩大模型覆盖范围" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:132 +msgid "Performance optimizations" +msgstr "性能优化" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:133 +msgid "Additional testing and validation" +msgstr "额外的测试和验证" + +#: ../../source/user_guide/feature_guide/batch_invariance.md:135 +msgid "" +"For the latest status and to contribute ideas, see the [tracking " +"issue](https://github.com/vllm-project/vllm-ascend/issues/5487)." +msgstr "有关最新状态和贡献想法,请参阅 [跟踪问题](https://github.com/vllm-project/vllm-ascend/issues/5487)。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/context_parallel.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/context_parallel.po new file mode 100644 index 00000000..66f7df62 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/context_parallel.po @@ -0,0 +1,299 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/context_parallel.md:1 +msgid "Context Parallel Guide" +msgstr "上下文并行指南" + +#: ../../source/user_guide/feature_guide/context_parallel.md:3 +msgid "Overview" +msgstr "概述" + +#: ../../source/user_guide/feature_guide/context_parallel.md:5 +msgid "" +"This guide shows how to use Context Parallel, a long sequence inference " +"optimization technique. Context Parallel includes `PCP` (Prefill Context " +"Parallel) and `DCP` (Decode Context Parallel), which reduces NPU memory " +"usage and improves inference speed in long sequence LLM inference." +msgstr "" +"本指南介绍如何使用上下文并行(Context Parallel),一种长序列推理优化技术。上下文并行包括 `PCP`(预填充上下文并行)和 `DCP`(解码上下文并行),可减少长序列LLM推理中的NPU内存使用并提升推理速度。" + +#: ../../source/user_guide/feature_guide/context_parallel.md:7 +msgid "Benefits of Context Parallel" +msgstr "上下文并行的优势" + +#: ../../source/user_guide/feature_guide/context_parallel.md:9 +msgid "" +"Context parallel mainly solves the problem of serving long context " +"requests. As prefill and decode present quite different characteristics " +"and have quite different SLO (service level objectives), we need to " +"implement context parallel separately for them. The major considerations " +"are:" +msgstr "" +"上下文并行主要解决服务长上下文请求的问题。由于预填充和解码阶段具有截然不同的特性以及不同的服务级别目标(SLO),我们需要分别为它们实现上下文并行。主要考虑点如下:" + +#: ../../source/user_guide/feature_guide/context_parallel.md:11 +msgid "" +"For long context prefill, we can use context parallel to reduce TTFT " +"(time to first token) by amortizing the computation time of the prefill " +"across query tokens." +msgstr "" +"对于长上下文预填充,我们可以使用上下文并行,通过将预填充的计算时间分摊到查询令牌上,从而减少首令牌时间(TTFT)。" + +#: ../../source/user_guide/feature_guide/context_parallel.md:12 +msgid "" +"For long context decode, we can use context parallel to reduce KV cache " +"duplication and offer more space for KV cache to increase the batch size " +"(and hence the throughput)." +msgstr "" +"对于长上下文解码,我们可以使用上下文并行来减少KV缓存的重复存储,为KV缓存提供更多空间,从而增加批处理大小(进而提升吞吐量)。" + +#: ../../source/user_guide/feature_guide/context_parallel.md:14 +msgid "" +"To learn more about the theory and implementation details of context " +"parallel, please refer to the [context parallel developer " +"guide](../../developer_guide/Design_Documents/context_parallel.md)." +msgstr "" +"要了解更多关于上下文并行的理论和实现细节,请参阅[上下文并行开发者指南](../../developer_guide/Design_Documents/context_parallel.md)。" + +#: ../../source/user_guide/feature_guide/context_parallel.md:16 +msgid "Supported Scenarios" +msgstr "支持场景" + +#: ../../source/user_guide/feature_guide/context_parallel.md:18 +msgid "" +"Currently context parallel can be used together with most other features," +" supported features are as follows:" +msgstr "目前上下文并行可与大多数其他功能结合使用,支持的功能如下:" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "Eager" +msgstr "Eager模式" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "Graph" +msgstr "Graph模式" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "Prefix
Cache" +msgstr "前缀
缓存" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "Chunked
Prefill" +msgstr "分块
预填充" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "SpecDecode
(MTP)" +msgstr "推测解码
(MTP)" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "PD
disaggregation" +msgstr "PD
解耦" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "MLAPO" +msgstr "MLAPO" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "**PCP**" +msgstr "**PCP**" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "✅" +msgstr "✅" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "**DCP**" +msgstr "**DCP**" + +#: ../../source/user_guide/feature_guide/context_parallel.md:25 +msgid "How to use Context Parallel" +msgstr "如何使用上下文并行" + +#: ../../source/user_guide/feature_guide/context_parallel.md:27 +msgid "" +"You can enable `PCP` and `DCP` by `prefill_context_parallel_size` and " +"`decode_context_parallel_size`, refer to the following example:" +msgstr "您可以通过 `prefill_context_parallel_size` 和 `decode_context_parallel_size` 启用 `PCP` 和 `DCP`,请参考以下示例:" + +#: ../../source/user_guide/feature_guide/context_parallel.md:29 +msgid "Offline example:" +msgstr "离线示例:" + +#: ../../source/user_guide/feature_guide/context_parallel.md:48 +msgid "Online example:" +msgstr "在线示例:" + +#: ../../source/user_guide/feature_guide/context_parallel.md:57 +msgid "" +"The total world size is `tensor_parallel_size` * " +"`prefill_context_parallel_size`, so the examples above need 4 NPUs for " +"each." +msgstr "总的世界大小为 `tensor_parallel_size` * `prefill_context_parallel_size`,因此上述示例各需要4个NPU。" + +#: ../../source/user_guide/feature_guide/context_parallel.md:59 +msgid "Constraints" +msgstr "约束条件" + +#: ../../source/user_guide/feature_guide/context_parallel.md:61 +msgid "While using DCP, the following constraints must be met:" +msgstr "使用DCP时,必须满足以下约束条件:" + +#: ../../source/user_guide/feature_guide/context_parallel.md:62 +msgid "For MLA-based model, such as DeepSeek-R1:" +msgstr "对于基于MLA的模型,例如DeepSeek-R1:" + +#: ../../source/user_guide/feature_guide/context_parallel.md:63 +msgid "`tensor_parallel_size >= decode_context_parallel_size`" +msgstr "`tensor_parallel_size >= decode_context_parallel_size`" + +#: ../../source/user_guide/feature_guide/context_parallel.md:64 +#, python-format +msgid "`tensor_parallel_size % decode_context_parallel_size == 0`" +msgstr "`tensor_parallel_size % decode_context_parallel_size == 0`" + +#: ../../source/user_guide/feature_guide/context_parallel.md:65 +msgid "For GQA-based model, such as Qwen3-235B:" +msgstr "对于基于GQA的模型,例如Qwen3-235B:" + +#: ../../source/user_guide/feature_guide/context_parallel.md:66 +msgid "" +"`(tensor_parallel_size // num_key_value_heads) >= " +"decode_context_parallel_size`" +msgstr "`(tensor_parallel_size // num_key_value_heads) >= decode_context_parallel_size`" + +#: ../../source/user_guide/feature_guide/context_parallel.md:67 +#, python-format +msgid "" +"`(tensor_parallel_size // num_key_value_heads) % " +"decode_context_parallel_size == 0`" +msgstr "`(tensor_parallel_size // num_key_value_heads) % decode_context_parallel_size == 0`" + +#: ../../source/user_guide/feature_guide/context_parallel.md:69 +msgid "" +"While using Context Parallel in KV cache transfer-needed scenario (e.g. " +"KV pooling, PD disaggregation), to simplify KV cache transmission, " +"`cp_kv_cache_interleave_size` must be set to the same value of KV cache " +"`block_size`(default: 128), which specifies CP to split KV cache in a " +"block-interleave style. For example:" +msgstr "" +"在需要KV缓存传输的场景(例如KV池化、PD解耦)中使用上下文并行时,为简化KV缓存传输,必须将 `cp_kv_cache_interleave_size` 设置为与KV缓存 `block_size`(默认:128)相同的值,这指定了CP以块交错方式分割KV缓存。例如:" + +#: ../../source/user_guide/feature_guide/context_parallel.md:80 +msgid "Experimental Results" +msgstr "实验结果" + +#: ../../source/user_guide/feature_guide/context_parallel.md:82 +msgid "" +"To evaluate the effectiveness of Context Parallel in long sequence LLM " +"inference scenarios, we use **DeepSeek-R1-W8A8** and **Qwen3-235B**, " +"deploy PD disaggregate instances in the environment of 64 cards Ascend " +"910C*64G (A3), the configuration and performance data are as follows." +msgstr "" +"为评估上下文并行在长序列LLM推理场景中的有效性,我们使用 **DeepSeek-R1-W8A8** 和 **Qwen3-235B**,在64卡Ascend 910C*64G(A3)环境中部署PD解耦实例,配置和性能数据如下。" + +#: ../../source/user_guide/feature_guide/context_parallel.md:84 +msgid "DeepSeek-R1-W8A8:" +msgstr "DeepSeek-R1-W8A8:" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "Configuration" +msgstr "配置" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "Input length
32k" +msgstr "输入长度
32k" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "Input length
64k" +msgstr "输入长度
64k" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "Input length
128k" +msgstr "输入长度
128k" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "P node: (DP2 TP8 EP16) *2
D node: (DP32 EP32)*1" +msgstr "P节点: (DP2 TP8 EP16) *2
D节点: (DP32 EP32)*1" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "TTFT: 9.3s
TPOT: 72ms" +msgstr "TTFT: 9.3s
TPOT: 72ms" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "TTFT: 22.8s
TPOT: 74ms" +msgstr "TTFT: 22.8s
TPOT: 74ms" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "TTFT: 73.2s
TPOT: 82ms" +msgstr "TTFT: 73.2s
TPOT: 82ms" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "P node: (PCP2 TP8 DCP8 EP16) *2
D node: (DP32 EP32)*1" +msgstr "P节点: (PCP2 TP8 DCP8 EP16) *2
D节点: (DP32 EP32)*1" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "TTFT: 7.9s
TPOT: 74ms" +msgstr "TTFT: 7.9s
TPOT: 74ms" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "TTFT: 15.9s
TPOT: 78ms" +msgstr "TTFT: 15.9s
TPOT: 78ms" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "TTFT: 46.0s
TPOT: 83ms" +msgstr "TTFT: 46.0s
TPOT: 83ms" + +#: ../../source/user_guide/feature_guide/context_parallel.md:91 +msgid "Qwen3-235B:" +msgstr "Qwen3-235B:" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "Input length
120k" +msgstr "输入长度
120k" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "TTFT: 5.1s
TPOT: 65ms" +msgstr "TTFT: 5.1s
TPOT: 65ms" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "TTFT: 13.1s
TPOT: 85ms" +msgstr "TTFT: 13.1s
TPOT: 85ms" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "TTFT: 33.9s
TPOT: 120ms" +msgstr "TTFT: 33.9s
TPOT: 120ms" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "P node: (PCP2 TP8 DCP2 EP16) *2
D node: (DP32 EP32)*1" +msgstr "P节点: (PCP2 TP8 DCP2 EP16) *2
D节点: (DP32 EP32)*1" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "TTFT: 3.0s
TPOT: 66ms" +msgstr "TTFT: 3.0s
TPOT: 66ms" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "TTFT: 8.9s
TPOT: 86ms" +msgstr "TTFT: 8.9s
TPOT: 86ms" + +#: ../../source/user_guide/feature_guide/context_parallel.md +msgid "TTFT: 22.7s
TPOT: 121ms" +msgstr "TTFT: 22.7s
TPOT: 121ms" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/cpu_binding.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/cpu_binding.po new file mode 100644 index 00000000..132fbbbf --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/cpu_binding.po @@ -0,0 +1,284 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:1 +msgid "CPU Binding" +msgstr "CPU 绑定" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:3 +msgid "Overview" +msgstr "概述" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:5 +msgid "" +"CPU Binding is a performance optimization feature for vLLM, specifically " +"designed for servers equipped with **ARM architecture and Ascend NPUs**. " +"It pins vLLM processes and threads to specific CPU cores to reduce " +"CPU–NPU cross‑NUMA communication overhead and stabilize inference " +"latency. This feature only adjusts host-side CPU affinity policies and " +"**does not alter model execution logic or impact inference results**." +msgstr "" +"CPU 绑定是 vLLM 的一项性能优化功能,专为配备 **ARM 架构和昇腾 NPU** 的服务器设计。它将 vLLM 进程和线程固定到特定的 CPU 核心,以减少 CPU-NPU 跨 NUMA 通信开销并稳定推理延迟。此功能仅调整主机端的 CPU 亲和性策略,**不会改变模型执行逻辑或影响推理结果**。" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:7 +msgid "Usage" +msgstr "使用方法" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:9 +msgid "Online serving example with CPU binding enabled (by default)" +msgstr "启用 CPU 绑定的在线服务示例(默认)" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:16 +msgid "Online serving example with CPU binding disabled" +msgstr "禁用 CPU 绑定的在线服务示例" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:23 +msgid "Offline inference example with CPU binding enabled" +msgstr "启用 CPU 绑定的离线推理示例" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:34 +msgid "Offline inference example with CPU binding disabled" +msgstr "禁用 CPU 绑定的离线推理示例" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:45 +msgid "Dependencies" +msgstr "依赖项" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:47 +msgid "Installation" +msgstr "安装" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:49 +msgid "Ubuntu/Debian" +msgstr "Ubuntu/Debian" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:56 +msgid "RHEL/CentOS/Alma/Rocky" +msgstr "RHEL/CentOS/Alma/Rocky" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:62 +msgid "openEuler" +msgstr "openEuler" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:68 +msgid "IRQ binding's additional considerations" +msgstr "IRQ 绑定的额外注意事项" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:70 +msgid "" +"For best results, if you run inside a docker container, which `systemctl`" +" is likely unavailable, stop `irqbalance` service on the host manually " +"before starting vLLM. Also make sure the container has the necessary " +"permissions to write to `/proc/irq/*/smp_affinity` for IRQ binding:" +msgstr "" +"为获得最佳效果,如果您在 Docker 容器内运行(容器内可能没有 `systemctl`),请在启动 vLLM 前手动在主机上停止 `irqbalance` 服务。同时确保容器具有写入 `/proc/irq/*/smp_affinity` 以进行 IRQ 绑定所需的权限:" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:72 +msgid "**Stop `irqbalance` service**:" +msgstr "**停止 `irqbalance` 服务**:" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:74 +msgid "" +"For example, on Ubuntu system, you can run the following command to stop " +"irqbalance:" +msgstr "例如,在 Ubuntu 系统上,您可以运行以下命令来停止 irqbalance:" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:79 +msgid "After you finish the vLLM process, you can restore irqbalance on the host:" +msgstr "完成 vLLM 进程后,您可以在主机上恢复 irqbalance:" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:85 +msgid "**Permissions**:" +msgstr "**权限**:" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:86 +msgid "Read access to `/proc/self/status` and `/proc/interrupts`" +msgstr "对 `/proc/self/status` 和 `/proc/interrupts` 的读取权限" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:87 +msgid "Write access to `/proc/irq/*/smp_affinity` for IRQ binding" +msgstr "对 `/proc/irq/*/smp_affinity` 的写入权限(用于 IRQ 绑定)" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:89 +msgid "Common Issues & Troubleshooting" +msgstr "常见问题与故障排除" + +#: ../../source/user_guide/feature_guide/cpu_binding.md +msgid "Error/Warning Message" +msgstr "错误/警告信息" + +#: ../../source/user_guide/feature_guide/cpu_binding.md +msgid "Core Cause" +msgstr "核心原因" + +#: ../../source/user_guide/feature_guide/cpu_binding.md +msgid "Solution" +msgstr "解决方案" + +#: ../../source/user_guide/feature_guide/cpu_binding.md +msgid "Can not get running npu info." +msgstr "无法获取运行的 NPU 信息。" + +#: ../../source/user_guide/feature_guide/cpu_binding.md +msgid "" +"The npu-smi process table is empty, or the `ASCEND_RT_VISIBLE_DEVICES` " +"environment variable filters out all NPUs." +msgstr "npu-smi 进程表为空,或者 `ASCEND_RT_VISIBLE_DEVICES` 环境变量过滤掉了所有 NPU。" + +#: ../../source/user_guide/feature_guide/cpu_binding.md +msgid "" +"1. Ensure the process is running on visible NPUs; 2. Verify that the " +"`ASCEND_RT_VISIBLE_DEVICES` value matches the actual logical NPU IDs." +msgstr "1. 确保进程在可见的 NPU 上运行;2. 验证 `ASCEND_RT_VISIBLE_DEVICES` 的值与实际逻辑 NPU ID 匹配。" + +#: ../../source/user_guide/feature_guide/cpu_binding.md +msgid "Insufficient CPUs for binding..." +msgstr "用于绑定的 CPU 不足..." + +#: ../../source/user_guide/feature_guide/cpu_binding.md +msgid "" +"The number of CPU cores allocated to each NPU is less than the minimum " +"requirement of 5." +msgstr "分配给每个 NPU 的 CPU 核心数少于最低要求 5 个。" + +#: ../../source/user_guide/feature_guide/cpu_binding.md +msgid "1. Expand the allowed CPU list; 2. Reduce the number of visible NPUs." +msgstr "1. 扩展允许的 CPU 列表;2. 减少可见 NPU 的数量。" + +#: ../../source/user_guide/feature_guide/cpu_binding.md +msgid "NPU topo affinity not found..." +msgstr "未找到 NPU 拓扑亲和性..." + +#: ../../source/user_guide/feature_guide/cpu_binding.md +msgid "npu-smi is unable to retrieve NPU topology affinity information." +msgstr "npu-smi 无法检索 NPU 拓扑亲和性信息。" + +#: ../../source/user_guide/feature_guide/cpu_binding.md +msgid "" +"Verify the integrity of the npu-smi installation and ensure the user has " +"sufficient execution permissions." +msgstr "验证 npu-smi 安装的完整性,并确保用户具有足够的执行权限。" + +#: ../../source/user_guide/feature_guide/cpu_binding.md +msgid "Bind cpus failed in rankX..." +msgstr "在 rankX 中绑定 CPU 失败..." + +#: ../../source/user_guide/feature_guide/cpu_binding.md +msgid "" +"The CPU binding process failed (e.g., taskset is unavailable, or the user" +" lacks write permissions for /proc/irq)." +msgstr "CPU 绑定过程失败(例如,taskset 不可用,或用户缺少对 /proc/irq 的写入权限)。" + +#: ../../source/user_guide/feature_guide/cpu_binding.md +msgid "" +"1. Confirm that required tools (taskset, lscpu, npu-smi) are installed " +"and available; 2. Verify the Cpus_allowed_list in `/proc/self/status` is " +"valid." +msgstr "1. 确认所需工具(taskset, lscpu, npu-smi)已安装且可用;2. 验证 `/proc/self/status` 中的 Cpus_allowed_list 是有效的。" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:98 +msgid "Key Limitations" +msgstr "主要限制" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:100 +msgid "ARM architecture only: Binding is automatically skipped on x86_64 systems." +msgstr "仅限 ARM 架构:在 x86_64 系统上会自动跳过绑定。" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:102 +msgid "" +"Symmetric NUMA layout required for optimal performance: CPU numbering " +"should be aligned with NUMA nodes. Non-symmetric layouts may result in " +"cross-NUMA CPU pools, reducing locality." +msgstr "需要对称的 NUMA 布局以获得最佳性能:CPU 编号应与 NUMA 节点对齐。非对称布局可能导致跨 NUMA 的 CPU 池,降低局部性。" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:104 +msgid "" +"IRQ binding requires write permissions for /proc/irq. Memory binding " +"depends on the `migratepages` tool; if unavailable, memory migration is " +"skipped." +msgstr "IRQ 绑定需要对 /proc/irq 的写入权限。内存绑定依赖于 `migratepages` 工具;如果不可用,则跳过内存迁移。" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:106 +msgid "FAQ" +msgstr "常见问题解答" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:108 +msgid "**Q1: Does CPU binding work on x86_64?**" +msgstr "**Q1: CPU 绑定在 x86_64 上有效吗?**" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:110 +msgid "No. The binding is skipped on non‑ARM CPUs." +msgstr "否。在非 ARM CPU 上会跳过绑定。" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:112 +msgid "**Q2: Why are only the current rank’s IRQs bound?**" +msgstr "**Q2: 为什么只绑定当前 rank 的 IRQ?**" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:114 +msgid "" +"To avoid multiple processes overwriting IRQ affinity settings for the " +"same device." +msgstr "为了避免多个进程覆盖同一设备的 IRQ 亲和性设置。" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:116 +msgid "**Q3: What if my cpuset already limits CPUs?**" +msgstr "**Q3: 如果我的 cpuset 已经限制了 CPU 怎么办?**" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:118 +msgid "" +"The binder uses Cpus_allowed_list from /proc/self/status as the only " +"eligible CPU set. Ensure this list is large enough." +msgstr "绑定器使用来自 /proc/self/status 的 Cpus_allowed_list 作为唯一符合条件的 CPU 集合。请确保此列表足够大。" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:120 +msgid "**Q4: Does CPU binding change model outputs?**" +msgstr "**Q4: CPU 绑定会改变模型输出吗?**" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:122 +msgid "" +"No. It only affects host‑side affinity and should not change numerical " +"results." +msgstr "不会。它只影响主机端的亲和性,不应改变数值结果。" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:126 +msgid "Summary" +msgstr "总结" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:128 +msgid "" +"**Core Objective**: Reduce cross‑NUMA communication by pinning vLLM " +"processes and threads to specific CPU cores, thereby stabilizing " +"inference latency in Ascend NPU deployments (only applicable to ARM " +"architectures)." +msgstr "**核心目标**:通过将 vLLM 进程和线程固定到特定的 CPU 核心来减少跨 NUMA 通信,从而稳定昇腾 NPU 部署中的推理延迟(仅适用于 ARM 架构)。" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:130 +msgid "" +"**Usage**: Enable or disable with `enable_cpu_binding` via " +"`additional_config` in both online and offline workflows." +msgstr "**使用方法**:在在线和离线工作流中,通过 `additional_config` 中的 `enable_cpu_binding` 参数启用或禁用。" + +#: ../../source/user_guide/feature_guide/cpu_binding.md:132 +msgid "" +"**Key Limitations**: ARM‑only; relies on symmetric NUMA layouts; binding " +"fails if the CPU pool has fewer than 5 cores; binding errors trigger a " +"warning log but do not terminate the process." +msgstr "**主要限制**:仅限 ARM;依赖于对称的 NUMA 布局;如果 CPU 池少于 5 个核心,绑定会失败;绑定错误会触发警告日志但不会终止进程。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/dynamic_batch.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/dynamic_batch.po new file mode 100644 index 00000000..bf73b1d3 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/dynamic_batch.po @@ -0,0 +1,108 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/dynamic_batch.md:1 +msgid "Dynamic Batch" +msgstr "动态批处理" + +#: ../../source/user_guide/feature_guide/dynamic_batch.md:3 +msgid "" +"Dynamic batch is a technique that dynamically adjusts the chunksize " +"during each inference iteration within the chunked prefilling strategy " +"according to the resources and SLO targets, thereby improving the " +"effective throughput and decreasing the TBT." +msgstr "" +"动态批处理是一种技术,它根据资源和SLO目标,在分块预填充策略的每次推理迭代中动态调整块大小,从而提高有效吞吐量并降低TBT。" + +#: ../../source/user_guide/feature_guide/dynamic_batch.md:5 +msgid "" +"Dynamic batch is controlled by the value of the " +"`--SLO_limits_for_dynamic_batch`. Notably, only 910 B3 is supported with " +"decode token number scales below 2048 so far. Especially, the " +"improvements are quite obvious on Qwen, Llama models. We are working on " +"further improvements and this feature will support more XPUs in the " +"future." +msgstr "" +"动态批处理由 `--SLO_limits_for_dynamic_batch` 参数的值控制。值得注意的是,目前仅支持910 B3,且解码token数量规模需低于2048。特别是在Qwen、Llama模型上,改进效果相当明显。我们正在进行进一步的改进,该功能未来将支持更多XPU。" + +#: ../../source/user_guide/feature_guide/dynamic_batch.md:10 +msgid "Getting started" +msgstr "快速开始" + +#: ../../source/user_guide/feature_guide/dynamic_batch.md:12 +msgid "Prerequisites" +msgstr "先决条件" + +#: ../../source/user_guide/feature_guide/dynamic_batch.md:14 +msgid "" +"Dynamic batch now depends on an offline cost model saved in a lookup " +"table to refine the token budget. The lookup table is saved in a '.csv' " +"file, which should be first downloaded from [A2-B3-BLK128.csv](https" +"://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-" +"ascend/dynamic_batch_scheduler/A2-B3-BLK128.csv), renamed, and saved to " +"the path `vllm_ascend/core/profile_table.csv`" +msgstr "" +"动态批处理目前依赖于一个保存在查找表中的离线成本模型来优化token预算。该查找表保存在一个'.csv'文件中,需要先从[A2-B3-BLK128.csv](https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/dynamic_batch_scheduler/A2-B3-BLK128.csv)下载,重命名后保存到路径 `vllm_ascend/core/profile_table.csv`。" + +#: ../../source/user_guide/feature_guide/dynamic_batch.md:16 +msgid "" +"`Pandas` is needed to load the lookup table, in case pandas is not " +"installed." +msgstr "需要 `Pandas` 来加载查找表,以防未安装pandas。" + +#: ../../source/user_guide/feature_guide/dynamic_batch.md:22 +msgid "Tuning Parameters" +msgstr "调优参数" + +#: ../../source/user_guide/feature_guide/dynamic_batch.md:24 +msgid "" +"`--SLO_limits_for_dynamic_batch` is the tuning parameter (integer type) " +"for the dynamic batch feature, larger values impose more constraints on " +"the latency limitation, leading to higher effective throughput. The " +"parameter can be selected according to the specific models or service " +"requirements." +msgstr "" +"`--SLO_limits_for_dynamic_batch` 是动态批处理功能的调优参数(整数类型),较大的值会对延迟限制施加更多约束,从而带来更高的有效吞吐量。可以根据具体模型或服务需求选择该参数。" + +#: ../../source/user_guide/feature_guide/dynamic_batch.md:32 +msgid "Supported Models" +msgstr "支持的模型" + +#: ../../source/user_guide/feature_guide/dynamic_batch.md:34 +msgid "" +"So far, dynamic batch performs better on several dense models including " +"Qwen and Llama (from 8B to 32B) with `tensor_parallel_size=8`. For " +"different models, a proper `SLO_limits_for_dynamic_batch` parameter is " +"needed. The empirical value of this parameter is generally `35, 50, or " +"75`. Therefore, some additional tests are needed to select the best " +"parameter." +msgstr "" +"目前,动态批处理在几个密集模型上表现更好,包括Qwen和Llama(从8B到32B),且 `tensor_parallel_size=8`。对于不同的模型,需要一个合适的 `SLO_limits_for_dynamic_batch` 参数。该参数的经验值通常是 `35、50或75`。因此,需要进行一些额外的测试来选择最佳参数。" + +#: ../../source/user_guide/feature_guide/dynamic_batch.md:36 +msgid "Usage" +msgstr "使用方法" + +#: ../../source/user_guide/feature_guide/dynamic_batch.md:38 +msgid "" +"Dynamic batch is used in the online inference. A fully executable example" +" is as follows:" +msgstr "动态批处理用于在线推理。一个完全可执行的示例如下:" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/epd_disaggregation.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/epd_disaggregation.po new file mode 100644 index 00000000..ff96621a --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/epd_disaggregation.po @@ -0,0 +1,237 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:1 +msgid "Disaggregated-encoder" +msgstr "解耦编码器" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:3 +msgid "Why disaggregated-encoder?" +msgstr "为何需要解耦编码器?" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:5 +msgid "" +"A **disaggregated encoder** runs the vision-encoder stage of a multimodal" +" LLM in a process that is separate from the pre-fill / decoder stage. " +"Deploying these two stages in independent vLLM instances brings three " +"practical benefits:" +msgstr "" +"**解耦编码器** 将多模态大语言模型的视觉编码器阶段运行在与预填充/解码器阶段分离的进程中。将这两个阶段部署在独立的 vLLM 实例中,带来三个实际好处:" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:7 +msgid "**Independent, fine-grained scaling**" +msgstr "**独立、细粒度的扩展**" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:9 +msgid "" +"Vision encoders are lightweight, while language models are orders of " +"magnitude larger." +msgstr "视觉编码器是轻量级的,而语言模型则要大几个数量级。" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:10 +msgid "" +"The language model can be parallelised without affecting the encoder " +"fleet." +msgstr "语言模型可以并行化,而不影响编码器集群。" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:11 +msgid "Encoder nodes can be added or removed independently." +msgstr "编码器节点可以独立地添加或移除。" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:13 +msgid "**Lower time-to-first-token (TTFT)**" +msgstr "**降低首令牌生成时间 (TTFT)**" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:15 +msgid "Language-only requests bypass the vision encoder entirely." +msgstr "纯文本请求完全绕过视觉编码器。" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:16 +msgid "" +"Encoder output is injected only at required attention layers, shortening " +"the pre-fill critical path." +msgstr "编码器输出仅在所需的注意力层注入,缩短了预填充的关键路径。" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:18 +msgid "**Cross-process reuse and caching of encoder outputs**" +msgstr "**编码器输出的跨进程复用与缓存**" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:20 +msgid "In-process encoders confine reuse to a single worker." +msgstr "进程内编码器将复用限制在单个工作进程内。" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:21 +msgid "" +"A remote, shared cache lets any worker retrieve existing embeddings, " +"eliminating redundant computation." +msgstr "远程共享缓存允许任何工作进程检索现有的嵌入向量,从而消除冗余计算。" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:23 +msgid "" +"Design doc: " +msgstr "" +"设计文档:" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:27 +msgid "Usage" +msgstr "使用方法" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:29 +msgid "" +"The current reference pathway is **ExampleConnector**. The ready-to-run " +"scripts below show the workflow:" +msgstr "当前的参考实现路径是 **ExampleConnector**。以下开箱即用的脚本展示了工作流程:" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:32 +msgid "" +"1 Encoder instance + 1 PD instance: " +"`examples/online_serving/disaggregated_encoder/disagg_1e1pd/`" +msgstr "" +"1 个编码器实例 + 1 个 PD 实例:" +"`examples/online_serving/disaggregated_encoder/disagg_1e1pd/`" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:35 +msgid "" +"1 Encoder instance + 1 Prefill instance + 1 Decode instance: " +"`examples/online_serving/disaggregated_encoder/disagg_1e1p1d/`" +msgstr "" +"1 个编码器实例 + 1 个预填充实例 + 1 个解码实例:" +"`examples/online_serving/disaggregated_encoder/disagg_1e1p1d/`" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:40 +msgid "Development" +msgstr "开发说明" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:42 +msgid "![alt text](<./images/epd_disaggregation.jpg>)" +msgstr "![替代文本](<./images/epd_disaggregation.jpg>)" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:42 +msgid "alt text" +msgstr "替代文本" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:44 +msgid "Disaggregated encoding is implemented by running two parts:" +msgstr "解耦编码通过运行两个部分来实现:" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:46 +msgid "**Encoder instance** – a vLLM instance to perform vision encoding." +msgstr "**编码器实例** – 一个执行视觉编码的 vLLM 实例。" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:47 +msgid "**Prefill/Decode (PD) instance(s)** – runs language pre-fill and decode." +msgstr "**预填充/解码 (PD) 实例** – 运行语言预填充和解码。" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:48 +msgid "" +"PD can be in either a single normal instance with (E + PD) or in " +"disaggregated instances with (E + P + D)" +msgstr "PD 可以是一个包含 (E + PD) 的单一常规实例,也可以是解耦的 (E + P + D) 实例" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:50 +msgid "" +"A connector transfers encoder-cache (EC) embeddings from the encoder " +"instance to the PD instance. All related code is under " +"`vllm/distributed/ec_transfer`." +msgstr "" +"一个连接器将编码器缓存 (EC) 嵌入向量从编码器实例传输到 PD 实例。所有相关代码位于 `vllm/distributed/ec_transfer` 目录下。" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:53 +msgid "Key abstractions" +msgstr "关键抽象" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:55 +msgid "" +"**ECConnector** – interface for retrieving EC caches produced by the " +"encoder." +msgstr "**ECConnector** – 用于检索编码器生成的 EC 缓存的接口。" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:56 +msgid "*Scheduler role* – checks cache existence and schedules loads." +msgstr "*调度器角色* – 检查缓存是否存在并调度加载。" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:57 +msgid "*Worker role* – loads the embeddings into memory." +msgstr "*工作进程角色* – 将嵌入向量加载到内存中。" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:59 +msgid "**EPD Load Balance Proxy** -" +msgstr "**EPD 负载均衡代理** -" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:60 +msgid "" +"*Multi-Path Scheduling Strategy* - dynamically diverts the multimodal " +"request or text requests to the corresponding inference path" +msgstr "*多路径调度策略* - 动态地将多模态请求或文本请求分流到相应的推理路径" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:61 +msgid "" +"*Instance-Level Dynamic Load Balancing* - dispatches multimodal requests" +" based on a least-loaded strategy, using a priority queue to balance the " +"active token workload across instances." +msgstr "*实例级动态负载均衡* - 基于最小负载策略分发多模态请求,使用优先级队列来平衡各实例间的活跃令牌工作负载。" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:63 +msgid "" +"We create the example setup with the **MooncakeLayerwiseConnector** from " +"`vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_layerwise_connector.py`" +" and refer to the " +"`examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py`" +" to facilitate the kv transfer between P and D. For step-by-step " +"deployment and configuration of Mooncake, refer to the following guide:" +" " +"[https://docs.vllm.ai/projects/ascend/en/latest/tutorials/pd_disaggregation_mooncake_multi_node.html](https://docs.vllm.ai/projects/ascend/en/latest/tutorials/features/pd_disaggregation_mooncake_multi_node.html)" +msgstr "" +"我们使用来自 `vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_layerwise_connector.py` 的 **MooncakeLayerwiseConnector** 创建示例设置,并参考 " +"`examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py` 来促进 P 和 D 之间的 KV 传输。关于 Mooncake 的逐步部署和配置,请参考以下指南:" +" " +"[https://docs.vllm.ai/projects/ascend/en/latest/tutorials/pd_disaggregation_mooncake_multi_node.html](https://docs.vllm.ai/projects/ascend/en/latest/tutorials/features/pd_disaggregation_mooncake_multi_node.html)" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:66 +msgid "" +"For the PD disaggregation part, when using MooncakeLayerwiseConnector: " +"The request first enters the Decoder instance,the Decoder triggers a " +"remote prefill task in reverse via the Metaserver. The Prefill node then " +"executes inference and pushes KV Cache layer-wise to the Decoder, " +"overlapping computation with transmission. Once the transfer is complete," +" the Decoder seamlessly continues with the subsequent token generation. " +"`docs/source/developer_guide/Design_Documents/disaggregated_prefill.md` " +"shows the brief idea about the disaggregated prefill." +msgstr "" +"对于 PD 解耦部分,当使用 MooncakeLayerwiseConnector 时:请求首先进入解码器实例,解码器通过元服务器反向触发一个远程预填充任务。然后预填充节点执行推理,并将 KV 缓存逐层推送到解码器,实现计算与传输的重叠。一旦传输完成,解码器无缝地继续后续的令牌生成。`docs/source/developer_guide/Design_Documents/disaggregated_prefill.md` 展示了关于解耦预填充的简要思路。" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:69 +msgid "Limitations" +msgstr "限制" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:71 +msgid "" +"Disable `--mm-processor-cache-gb 0` if you want to use cross-process " +"caching" +msgstr "如果要使用跨进程缓存,请禁用 `--mm-processor-cache-gb 0`" + +#: ../../source/user_guide/feature_guide/epd_disaggregation.md:73 +msgid "" +"For the PD disaggregation part, refer to the limitations of PD " +"decomposition" +msgstr "对于 PD 解耦部分,请参考 PD 分解的限制" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/eplb_swift_balancer.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/eplb_swift_balancer.po new file mode 100644 index 00000000..acdc8985 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/eplb_swift_balancer.po @@ -0,0 +1,247 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:1 +msgid "Expert Load Balance (EPLB)" +msgstr "专家负载均衡 (EPLB)" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:3 +msgid "Overview" +msgstr "概述" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:5 +msgid "" +"Expert balancing for MoE models in LLM serving is essential for optimal " +"performance. Dynamically changing experts during inference can negatively" +" impact TTFT (Time To First Token) and TPOT (Time Per Output Token) due " +"to stop-the-world operations. SwiftBalancer enables asynchronous expert " +"load balancing with zero-overhead expert movement, ensuring seamless " +"service continuity." +msgstr "" +"在LLM服务中,MoE模型的专家均衡对于实现最佳性能至关重要。推理过程中动态改变专家会因全局暂停操作而对TTFT(首词元时间)和TPOT(每输出词元时间)产生负面影响。SwiftBalancer支持异步专家负载均衡,实现零开销的专家迁移,确保服务无缝连续。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:7 +msgid "EPLB Effects" +msgstr "EPLB效果" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:9 +msgid "" +"Reduced Latency: Dynamically balances expert loads to minimize TTFT and " +"TPOT by distributing workloads evenly across experts." +msgstr "降低延迟:动态均衡专家负载,通过在各专家间均匀分配工作负载,最小化TTFT和TPOT。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:10 +msgid "" +"Enhanced Throughput: Optimizes GPU utilization, increasing token " +"generation speed under high-concurrency scenarios." +msgstr "提升吞吐量:优化GPU利用率,在高并发场景下提高词元生成速度。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:11 +msgid "" +"Zero-Overhead Movement: Expert redistribution occurs asynchronously " +"without interrupting ongoing inference requests." +msgstr "零开销迁移:专家重分布异步进行,不会中断正在进行的推理请求。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:12 +msgid "" +"Adaptive Scaling: Automatically adjusts to workload fluctuations while " +"maintaining stable performance." +msgstr "自适应扩展:自动适应工作负载波动,同时保持性能稳定。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:13 +msgid "" +"Fault Tolerance: Redundant expert placement ensures system resilience " +"during hardware failures." +msgstr "容错性:冗余的专家放置确保在硬件故障期间系统的韧性。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:15 +msgid "Support Scenarios" +msgstr "支持场景" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:17 +msgid "Models" +msgstr "模型" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:19 +msgid "DeepSeekV3/V3.1/R1, Qwen3-MoE" +msgstr "DeepSeekV3/V3.1/R1, Qwen3-MoE" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:21 +msgid "MOE QuantType" +msgstr "MOE量化类型" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:23 +msgid "W8A8-Dynamic" +msgstr "W8A8-Dynamic" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:25 +msgid "How to Use EPLB" +msgstr "如何使用EPLB" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:27 +msgid "Dynamic EPLB" +msgstr "动态EPLB" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:29 +msgid "" +"We need to add environment variable `export DYNAMIC_EPLB=\"true\"` to " +"enable vLLM EPLB. Enable dynamic balancing with auto-tuned parameters. " +"Adjust expert_heat_collection_interval and algorithm_execution_interval " +"based on workload patterns." +msgstr "" +"我们需要添加环境变量 `export DYNAMIC_EPLB=\"true\"` 来启用vLLM EPLB。启用具有自动调优参数的动态均衡。根据工作负载模式调整 expert_heat_collection_interval 和 algorithm_execution_interval。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:42 +msgid "Static EPLB" +msgstr "静态EPLB" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:44 +msgid "Initial Setup (Record Expert Map)" +msgstr "初始设置(记录专家映射)" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:46 +msgid "" +"We need to add environment variable `export EXPERT_MAP_RECORD=\"true\"` " +"to record expert map. Generate the initial expert distribution map using " +"expert_map_record_path. This creates a baseline configuration for future " +"deployments." +msgstr "" +"我们需要添加环境变量 `export EXPERT_MAP_RECORD=\"true\"` 来记录专家映射。使用 expert_map_record_path 生成初始专家分布映射。这将为未来的部署创建一个基线配置。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:60 +msgid "Subsequent Deployments (Use Recorded Map)" +msgstr "后续部署(使用记录的映射)" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:62 +msgid "" +"Load the pre-recorded expert map for consistent performance. This avoids " +"recalculating distributions at runtime." +msgstr "加载预记录的专家映射以获得一致的性能。这避免了在运行时重新计算分布。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:73 +msgid "Critical Considerations" +msgstr "关键注意事项" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:75 +msgid "Parameter Tuning:" +msgstr "参数调优:" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:76 +msgid "" +"expert_heat_collection_interval: Higher values (e.g., 400+) for stable " +"workloads; lower values (e.g., 100-200) for fluctuating traffic." +msgstr "expert_heat_collection_interval:对于稳定的工作负载使用较高值(例如400+);对于波动流量使用较低值(例如100-200)。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:77 +msgid "" +"algorithm_execution_interval: Should be ≥ 30 to avoid premature balancing" +" during startup." +msgstr "algorithm_execution_interval:应≥30,以避免在启动期间过早进行均衡。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:78 +msgid "" +"num_redundant_experts: Must match tensor-parallel size (e.g., 16 for 16 " +"GPUs) to ensure sufficient redundancy." +msgstr "num_redundant_experts:必须与张量并行大小匹配(例如,16个GPU对应16),以确保足够的冗余。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:80 +msgid "Hardware Requirements:" +msgstr "硬件要求:" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:81 +msgid "" +"Ensure that all GPUs have identical memory capacity and compute " +"capabilities." +msgstr "确保所有GPU具有相同的内存容量和计算能力。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:82 +msgid "" +"Network bandwidth must support expert redistribution traffic (≥ 10 Gbps " +"recommended)." +msgstr "网络带宽必须支持专家重分布流量(建议≥10 Gbps)。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:84 +msgid "Model Compatibility:" +msgstr "模型兼容性:" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:85 +msgid "" +"Only MoE models with explicit expert parallelism support (e.g., Qwen3 MoE" +" models) are compatible." +msgstr "仅支持显式专家并行的MoE模型(例如Qwen3 MoE模型)是兼容的。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:86 +msgid "" +"Verify model architecture supports dynamic expert routing through " +"`--enable-expert-parallel`." +msgstr "验证模型架构是否通过 `--enable-expert-parallel` 支持动态专家路由。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:88 +msgid "Monitoring & Validation:" +msgstr "监控与验证:" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:89 +msgid "" +"Track metrics: expert_load_balance_ratio, ttft_p99, tpot_avg, and " +"gpu_utilization." +msgstr "跟踪指标:expert_load_balance_ratio, ttft_p99, tpot_avg 和 gpu_utilization。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:90 +msgid "Use vLLM monitor to detect imbalances during runtime." +msgstr "使用vLLM监控器在运行时检测不均衡。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:91 +msgid "" +"Always verify expert map JSON structure before loading (validate with jq " +"or similar tools)." +msgstr "在加载前始终验证专家映射的JSON结构(使用jq或类似工具验证)。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:93 +msgid "Startup Behavior:" +msgstr "启动行为:" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:94 +msgid "" +"Initial requests may experience higher latency during the first balancing" +" cycle (typically 1-2 minutes)." +msgstr "初始请求在第一个均衡周期(通常1-2分钟)内可能会经历较高的延迟。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:95 +msgid "Avoid sudden traffic spikes during the warm-up phase." +msgstr "避免在预热阶段出现突发的流量高峰。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:97 +msgid "Common Pitfalls:" +msgstr "常见陷阱:" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:98 +msgid "" +"Incorrect tensor-parallel-size vs. actual GPU count → causes resource " +"underutilization." +msgstr "张量并行大小与实际GPU数量不匹配 → 导致资源利用不足。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:99 +msgid "Using expert_map_path without generating the map first → runtime errors." +msgstr "未先生成映射就使用 expert_map_path → 运行时错误。" + +#: ../../source/user_guide/feature_guide/eplb_swift_balancer.md:100 +msgid "Setting num_redundant_experts > available GPUs → system failure." +msgstr "设置 num_redundant_experts > 可用GPU数量 → 系统故障。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/external_dp.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/external_dp.po new file mode 100644 index 00000000..77370fa7 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/external_dp.po @@ -0,0 +1,164 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/external_dp.md:1 +msgid "External DP" +msgstr "外部数据并行" + +#: ../../source/user_guide/feature_guide/external_dp.md:3 +msgid "" +"For larger-scale deployments especially, it can make sense to handle the " +"orchestration and load balancing of data parallel ranks externally." +msgstr "特别是在大规模部署场景下,在外部处理数据并行等级的编排与负载均衡是有意义的。" + +#: ../../source/user_guide/feature_guide/external_dp.md:5 +msgid "" +"In this case, it's more convenient to treat each DP rank like a separate " +"vLLM deployment, with its own endpoint, and have an external router " +"balance HTTP requests between them, making use of appropriate real-time " +"telemetry from each server for routing decisions." +msgstr "在这种情况下,将每个数据并行等级视为一个独立的 vLLM 部署(拥有自己的端点),并使用一个外部路由器在它们之间平衡 HTTP 请求,同时利用来自每个服务器的适当实时遥测数据来做出路由决策,会更加方便。" + +#: ../../source/user_guide/feature_guide/external_dp.md:7 +msgid "Getting Start" +msgstr "开始使用" + +#: ../../source/user_guide/feature_guide/external_dp.md:9 +msgid "" +"The functionality of [external " +"DP](https://docs.vllm.ai/en/latest/serving/data_parallel_deployment/?h=external" +"#external-load-balancing) is already natively supported by vLLM. In vllm-" +"ascend we provide two enhanced functionalities:" +msgstr "[外部数据并行](https://docs.vllm.ai/en/latest/serving/data_parallel_deployment/?h=external#external-load-balancing) 功能已由 vLLM 原生支持。在 vllm-ascend 中,我们提供了两项增强功能:" + +#: ../../source/user_guide/feature_guide/external_dp.md:11 +msgid "" +"A launch script that helps to launch multiple vLLM instances in one " +"command." +msgstr "一个启动脚本,用于通过一条命令启动多个 vLLM 实例。" + +#: ../../source/user_guide/feature_guide/external_dp.md:12 +msgid "A request-length-aware load-balance proxy for external DP." +msgstr "一个支持外部数据并行、可感知请求长度的负载均衡代理。" + +#: ../../source/user_guide/feature_guide/external_dp.md:14 +msgid "This tutorial will introduce the usage of them." +msgstr "本教程将介绍它们的使用方法。" + +#: ../../source/user_guide/feature_guide/external_dp.md:16 +msgid "Prerequisites" +msgstr "先决条件" + +#: ../../source/user_guide/feature_guide/external_dp.md:18 +msgid "Python 3.10+" +msgstr "Python 3.10+" + +#: ../../source/user_guide/feature_guide/external_dp.md:19 +msgid "Install dependencies needed by load-balance proxy server:" +msgstr "安装负载均衡代理服务器所需的依赖项:" + +#: ../../source/user_guide/feature_guide/external_dp.md:25 +msgid "Starting External DP Servers" +msgstr "启动外部数据并行服务器" + +#: ../../source/user_guide/feature_guide/external_dp.md:27 +msgid "" +"First, you need to have at least two vLLM servers running in data " +"parallel. These can be mock servers or actual vLLM servers. Note that " +"this proxy also works with only one vLLM server running, but will fall " +"back to direct request forwarding which is meaningless." +msgstr "首先,您需要至少运行两个处于数据并行模式的 vLLM 服务器。这些可以是模拟服务器或实际的 vLLM 服务器。请注意,此代理在仅运行一个 vLLM 服务器时也能工作,但会退化为直接请求转发,这没有意义。" + +#: ../../source/user_guide/feature_guide/external_dp.md:29 +msgid "" +"You can start external vLLM DP servers one-by-one manually or using the " +"launch script in `examples/external_online_dp`. For scenarios of large DP" +" size across multiple nodes, we recommend using our launch script for " +"convenience." +msgstr "您可以手动逐个启动外部 vLLM 数据并行服务器,也可以使用 `examples/external_online_dp` 中的启动脚本。对于跨多个节点的大规模数据并行场景,我们建议使用我们的启动脚本以方便操作。" + +#: ../../source/user_guide/feature_guide/external_dp.md:31 +msgid "Manually Launch" +msgstr "手动启动" + +#: ../../source/user_guide/feature_guide/external_dp.md:39 +msgid "Use Launch Script" +msgstr "使用启动脚本" + +#: ../../source/user_guide/feature_guide/external_dp.md:41 +msgid "" +"Firstly, you need to modify the " +"`examples/external_online_dp/run_dp_template.sh` according to your vLLM " +"configuration. Then you can use " +"`examples/external_online_dp/launch_online_dp.py` to launch multiple vLLM" +" instances in one command on each node. It will internally call " +"`examples/external_online_dp/run_dp_template.sh` for each DP rank with " +"proper DP-related parameters." +msgstr "首先,您需要根据您的 vLLM 配置修改 `examples/external_online_dp/run_dp_template.sh`。然后,您可以使用 `examples/external_online_dp/launch_online_dp.py` 在每个节点上通过一条命令启动多个 vLLM 实例。它将在内部为每个数据并行等级调用 `examples/external_online_dp/run_dp_template.sh`,并传入适当的数据并行相关参数。" + +#: ../../source/user_guide/feature_guide/external_dp.md:43 +msgid "An example of running external DP in one single node:" +msgstr "在单个节点上运行外部数据并行的示例:" + +#: ../../source/user_guide/feature_guide/external_dp.md:51 +msgid "An example of running external DP in two nodes:" +msgstr "在两个节点上运行外部数据并行的示例:" + +#: ../../source/user_guide/feature_guide/external_dp.md:66 +msgid "Starting Load-balance Proxy Server" +msgstr "启动负载均衡代理服务器" + +#: ../../source/user_guide/feature_guide/external_dp.md:68 +msgid "" +"After all vLLM DP instances are launched, you can now launch the load-" +"balance proxy server, which serves as an entrypoint for coming requests " +"and load-balances them between vLLM DP instances." +msgstr "所有 vLLM 数据并行实例启动后,您现在可以启动负载均衡代理服务器。该服务器作为传入请求的入口点,并在各个 vLLM 数据并行实例之间进行负载均衡。" + +#: ../../source/user_guide/feature_guide/external_dp.md:70 +msgid "The proxy server has the following features:" +msgstr "该代理服务器具有以下特性:" + +#: ../../source/user_guide/feature_guide/external_dp.md:72 +msgid "Load balances requests to multiple vLLM servers based on request length." +msgstr "基于请求长度,将请求负载均衡到多个 vLLM 服务器。" + +#: ../../source/user_guide/feature_guide/external_dp.md:73 +msgid "" +"Supports OpenAI-compatible `/v1/completions` and `/v1/chat/completions` " +"endpoints." +msgstr "支持 OpenAI 兼容的 `/v1/completions` 和 `/v1/chat/completions` 端点。" + +#: ../../source/user_guide/feature_guide/external_dp.md:74 +msgid "Streams responses from backend servers to clients." +msgstr "将来自后端服务器的响应流式传输给客户端。" + +#: ../../source/user_guide/feature_guide/external_dp.md:76 +msgid "" +"To run the proxy server, you need to specify the host and port for each " +"vLLM DP Instance:" +msgstr "要运行代理服务器,您需要为每个 vLLM 数据并行实例指定主机和端口:" + +#: ../../source/user_guide/feature_guide/external_dp.md:91 +msgid "" +"After this, you can directly send requests to the proxy server and run DP" +" with external load balancing." +msgstr "此后,您可以直接向代理服务器发送请求,并运行具有外部负载均衡功能的数据并行。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po index b2336a4e..592e3cee 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po @@ -3,119 +3,120 @@ # This file is distributed under the same license as the PROJECT project. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../user_guide/feature_guide/graph_mode.md:1 +#: ../../source/user_guide/feature_guide/graph_mode.md:1 msgid "Graph Mode Guide" msgstr "图模式指南" -#: ../../user_guide/feature_guide/graph_mode.md:4 +#: ../../source/user_guide/feature_guide/graph_mode.md:4 msgid "" "This feature is currently experimental. In future versions, there may be " -"behavioral changes around configuration, coverage, performance improvement." +"behavioral changes around configuration, coverage, performance " +"improvement." msgstr "此功能目前为实验性功能。在未来的版本中,配置、覆盖率和性能改进等方面的行为可能会有变化。" -#: ../../user_guide/feature_guide/graph_mode.md:7 +#: ../../source/user_guide/feature_guide/graph_mode.md:8 +msgid "" +"In context parallel scenario (i.e. prefill_context_parallel_size * " +"decode_context_parallel_size > 1), \"cudagraph_mode\" is not sufficiently" +" supported to be set to \"FULL\" yet." +msgstr "在上下文并行场景下(即 prefill_context_parallel_size * decode_context_parallel_size > 1),目前尚不支持将 \"cudagraph_mode\" 充分设置为 \"FULL\"。" + +#: ../../source/user_guide/feature_guide/graph_mode.md:11 msgid "" "This guide provides instructions for using Ascend Graph Mode with vLLM " -"Ascend. Please note that graph mode is only available on V1 Engine. And only" -" Qwen, DeepSeek series models are well tested from 0.9.0rc1. We'll make it " -"stable and generalize in the next release." -msgstr "" -"本指南提供了在 vLLM Ascend 上使用 Ascend 图模式的操作说明。请注意,图模式仅在 V1 引擎上可用,并且从 0.9.0rc1 起,仅对" -" Qwen、DeepSeek 系列模型进行了充分测试。我们将在下一个版本中使其更加稳定和通用。" +"Ascend. Please note that graph mode is only available on V1 Engine. And " +"only Qwen, DeepSeek series models are well tested from 0.9.0rc1. We will " +"make it stable and generalized in the next release." +msgstr "本指南提供了在 vLLM Ascend 中使用昇腾图模式的操作说明。请注意,图模式仅在 V1 引擎上可用,并且从 0.9.0rc1 版本起,仅对 Qwen、DeepSeek 系列模型进行了充分测试。我们将在下一个版本中使其更加稳定和通用。" -#: ../../user_guide/feature_guide/graph_mode.md:9 +#: ../../source/user_guide/feature_guide/graph_mode.md:13 msgid "Getting Started" msgstr "快速入门" -#: ../../user_guide/feature_guide/graph_mode.md:11 +#: ../../source/user_guide/feature_guide/graph_mode.md:15 msgid "" -"From v0.9.1rc1 with V1 Engine, vLLM Ascend will run models in graph mode by " -"default to keep the same behavior with vLLM. If you hit any issues, please " -"feel free to open an issue on GitHub and fallback to eager mode temporarily " -"by set `enforce_eager=True` when initializing the model." -msgstr "" -"从 v0.9.1rc1 版本起,使用 V1 引擎时,vLLM Ascend 默认将在图模式下运行模型,以保持与 vLLM " -"同样的行为。如果遇到任何问题,欢迎在 GitHub 上提交 issue,并在初始化模型时通过设置 `enforce_eager=True` 临时切换回 " -"eager 模式。" +"From v0.9.1rc1 with V1 Engine, vLLM Ascend will run models in graph mode " +"by default to keep the same behavior with vLLM. If you hit any issues, " +"please feel free to open an issue on GitHub and fall back to the eager " +"mode temporarily by setting `enforce_eager=True` when initializing the " +"model." +msgstr "从 v0.9.1rc1 版本起,在使用 V1 引擎时,vLLM Ascend 默认将在图模式下运行模型,以保持与 vLLM 一致的行为。如果遇到任何问题,欢迎在 GitHub 上提交 issue,并可在初始化模型时通过设置 `enforce_eager=True` 临时切换回 eager 模式。" -#: ../../user_guide/feature_guide/graph_mode.md:13 -msgid "There are two kinds for graph mode supported by vLLM Ascend:" +#: ../../source/user_guide/feature_guide/graph_mode.md:17 +msgid "There are two kinds of graph mode supported by vLLM Ascend:" msgstr "vLLM Ascend 支持两种图模式:" -#: ../../user_guide/feature_guide/graph_mode.md:14 +#: ../../source/user_guide/feature_guide/graph_mode.md:19 msgid "" -"**ACLGraph**: This is the default graph mode supported by vLLM Ascend. In " -"v0.9.1rc1, only Qwen series models are well tested." -msgstr "" -"**ACLGraph**:这是 vLLM Ascend 支持的默认图模式。在 v0.9.1rc1 版本中,Qwen 和Deepseek系列模型得到了充分测试。" +"**ACLGraph**: This is the default graph mode supported by vLLM Ascend. In" +" v0.9.1rc1, Qwen and DeepSeek series models are well tested." +msgstr "**ACLGraph**:这是 vLLM Ascend 支持的默认图模式。在 v0.9.1rc1 版本中,Qwen 和 DeepSeek 系列模型经过了充分测试。" -#: ../../user_guide/feature_guide/graph_mode.md:15 +#: ../../source/user_guide/feature_guide/graph_mode.md:20 msgid "" -"**TorchAirGraph**: This is the GE graph mode. In v0.9.1rc1, only DeepSeek " -"series models are supported." -msgstr "**TorchAirGraph**:这是GE图模式。在v0.9.1rc1版本中,仅支持DeepSeek系列模型。" +"**XliteGraph**: This is the OpenEuler Xlite graph mode. In v0.11.0, only " +"Llama, Qwen dense series models, Qwen MoE series models, and Qwen3-VL are" +" supported." +msgstr "**XliteGraph**:这是 OpenEuler Xlite 图模式。在 v0.11.0 版本中,仅支持 Llama、Qwen 稠密系列模型、Qwen MoE 系列模型以及 Qwen3-VL。" -#: ../../user_guide/feature_guide/graph_mode.md:17 +#: ../../source/user_guide/feature_guide/graph_mode.md:22 msgid "Using ACLGraph" msgstr "使用 ACLGraph" -#: ../../user_guide/feature_guide/graph_mode.md:18 +#: ../../source/user_guide/feature_guide/graph_mode.md:24 msgid "" -"ACLGraph is enabled by default. Take Qwen series models as an example, just " -"set to use V1 Engine is enough." +"ACLGraph is enabled by default. Take Qwen series models as an example, " +"just set to use V1 Engine." msgstr "ACLGraph 默认启用。以 Qwen 系列模型为例,只需设置为使用 V1 引擎即可。" -#: ../../user_guide/feature_guide/graph_mode.md:20 -#: ../../user_guide/feature_guide/graph_mode.md:41 -#: ../../user_guide/feature_guide/graph_mode.md:64 -msgid "offline example:" +#: ../../source/user_guide/feature_guide/graph_mode.md:26 +#: ../../source/user_guide/feature_guide/graph_mode.md:51 +#: ../../source/user_guide/feature_guide/graph_mode.md:74 +msgid "Offline example:" msgstr "离线示例:" -#: ../../user_guide/feature_guide/graph_mode.md:31 -#: ../../user_guide/feature_guide/graph_mode.md:52 -#: ../../user_guide/feature_guide/graph_mode.md:74 -msgid "online example:" +#: ../../source/user_guide/feature_guide/graph_mode.md:37 +#: ../../source/user_guide/feature_guide/graph_mode.md:62 +#: ../../source/user_guide/feature_guide/graph_mode.md:84 +msgid "Online example:" msgstr "在线示例:" -#: ../../user_guide/feature_guide/graph_mode.md:37 -msgid "Using TorchAirGraph" -msgstr "使用 TorchAirGraph" +#: ../../source/user_guide/feature_guide/graph_mode.md:43 +msgid "Using XliteGraph" +msgstr "使用 XliteGraph" -#: ../../user_guide/feature_guide/graph_mode.md:39 +#: ../../source/user_guide/feature_guide/graph_mode.md:45 msgid "" -"If you want to run DeepSeek series models with graph mode, you should use " -"[TorchAirGraph](https://www.hiascend.com/document/detail/zh/Pytorch/700/modthirdparty/torchairuseguide/torchair_0002.html)." -" In this case, additional config is required." -msgstr "" -"如果你想通过图模式运行 DeepSeek 系列模型,你应该使用 " -"[TorchAirGraph](https://www.hiascend.com/document/detail/zh/Pytorch/700/modthirdparty/torchairuseguide/torchair_0002.html)。在这种情况下,需要额外的配置。" +"If you want to run Llama, Qwen dense series models, Qwen MoE series " +"models, or Qwen3-VL with Xlite graph mode, please install xlite, and set " +"xlite_graph_config." +msgstr "如果你想使用 Xlite 图模式运行 Llama、Qwen 稠密系列模型、Qwen MoE 系列模型或 Qwen3-VL,请安装 xlite 并设置 xlite_graph_config。" -#: ../../user_guide/feature_guide/graph_mode.md:58 +#: ../../source/user_guide/feature_guide/graph_mode.md:68 msgid "" -"You can find more detail about additional config " -"[here](../configuration/additional_config.md)." -msgstr "你可以在[这里](../configuration/additional_config.md)找到关于附加配置的更多详细信息。" +"You can find more details about " +"[Xlite](https://atomgit.com/openeuler/GVirt/blob/master/xlite/README.md)" +msgstr "你可以在 [Xlite](https://atomgit.com/openeuler/GVirt/blob/master/xlite/README.md) 找到更多详细信息。" -#: ../../user_guide/feature_guide/graph_mode.md:60 -msgid "Fallback to Eager Mode" +#: ../../source/user_guide/feature_guide/graph_mode.md:70 +msgid "Fallback to the Eager Mode" msgstr "回退到 Eager 模式" -#: ../../user_guide/feature_guide/graph_mode.md:62 +#: ../../source/user_guide/feature_guide/graph_mode.md:72 msgid "" -"If both `ACLGraph` and `TorchAirGraph` fail to run, you should fallback to " -"eager mode." -msgstr "如果 `ACLGraph` 和 `TorchAirGraph` 都无法运行,你应该退回到 eager 模式。" +"If `ACLGraph` and `XliteGraph` all fail to run, you should fall back to " +"the eager mode." +msgstr "如果 `ACLGraph` 和 `XliteGraph` 都无法运行,你应该退回到 eager 模式。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po new file mode 100644 index 00000000..070a4504 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po @@ -0,0 +1,644 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/kv_pool.md:1 +msgid "Ascend Store Deployment Guide" +msgstr "Ascend Store 部署指南" + +#: ../../source/user_guide/feature_guide/kv_pool.md:3 +msgid "Environmental Dependencies" +msgstr "环境依赖" + +#: ../../source/user_guide/feature_guide/kv_pool.md:5 +#: ../../source/user_guide/feature_guide/kv_pool.md:35 +msgid "Software:" +msgstr "软件:" + +#: ../../source/user_guide/feature_guide/kv_pool.md:6 +msgid "CANN >= 8.5.0" +msgstr "CANN >= 8.5.0" + +#: ../../source/user_guide/feature_guide/kv_pool.md:7 +msgid "vLLM:main branch" +msgstr "vLLM:main 分支" + +#: ../../source/user_guide/feature_guide/kv_pool.md:8 +msgid "vLLM-Ascend:main branch" +msgstr "vLLM-Ascend:main 分支" + +#: ../../source/user_guide/feature_guide/kv_pool.md:9 +msgid "mooncake:>= 0.3.9" +msgstr "mooncake:>= 0.3.9" + +#: ../../source/user_guide/feature_guide/kv_pool.md:11 +msgid "KV Pool Parameter Description" +msgstr "KV Pool 参数说明" + +#: ../../source/user_guide/feature_guide/kv_pool.md:13 +msgid "" +"`kv_connector_extra_config`: Additional Configurable Parameters for " +"Pooling" +msgstr "`kv_connector_extra_config`: 池化的额外可配置参数" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "Parameter" +msgstr "参数" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "Description" +msgstr "描述" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "`lookup_rpc_port`" +msgstr "`lookup_rpc_port`" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "" +"Port for RPC Communication Between Pooling Scheduler Process and Worker " +"Process: Each Instance Requires a Unique Port Configuration." +msgstr "池化调度进程与工作进程间 RPC 通信端口:每个实例需要配置唯一端口。" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "`load_async`" +msgstr "`load_async`" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "Whether to Enable Asynchronous Loading. The default value is false." +msgstr "是否启用异步加载。默认值为 false。" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "`backend`" +msgstr "`backend`" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "Set the storage backend for kvpool, with the default being mooncake." +msgstr "设置 kvpool 的存储后端,默认为 mooncake。" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "`consumer_is_to_put`" +msgstr "`consumer_is_to_put`" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "Whether Decode node put KV Cache into KV Pool. The default value is false." +msgstr "Decode 节点是否将 KV Cache 放入 KV Pool。默认值为 false。" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "`consumer_is_to_load`" +msgstr "`consumer_is_to_load`" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "" +"Whether Decode node load KV cache from KV Pool. The default value is " +"false." +msgstr "Decode 节点是否从 KV Pool 加载 KV cache。默认值为 false。" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "`prefill_pp_size`" +msgstr "`prefill_pp_size`" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "Prefill PP size, needs to be set when Prefill node enables PP." +msgstr "Prefill PP 大小,当 Prefill 节点启用 PP 时需要设置。" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "`prefill_pp_layer_partition`" +msgstr "`prefill_pp_layer_partition`" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "Prefill PP layer partition, needs to be set when Prefill node enables PP." +msgstr "Prefill PP 层划分,当 Prefill 节点启用 PP 时需要设置。" + +#: ../../source/user_guide/feature_guide/kv_pool.md:25 +msgid "Environment Variable Configuration" +msgstr "环境变量配置" + +#: ../../source/user_guide/feature_guide/kv_pool.md:27 +msgid "" +"To guarantee uniform hash generation, it is required to synchronize the " +"PYTHONHASHSEED environment variable across all nodes upon enabling KV " +"Pool." +msgstr "为保证哈希生成的一致性,启用 KV Pool 时,需要在所有节点上同步 PYTHONHASHSEED 环境变量。" + +#: ../../source/user_guide/feature_guide/kv_pool.md:33 +msgid "Example of using Mooncake as a KV Pool backend" +msgstr "使用 Mooncake 作为 KV Pool 后端的示例" + +#: ../../source/user_guide/feature_guide/kv_pool.md:35 +msgid "Software:" +msgstr "软件:" + +#: ../../source/user_guide/feature_guide/kv_pool.md:36 +msgid "Check NPU HCCN Configuration:" +msgstr "检查 NPU HCCN 配置:" + +#: ../../source/user_guide/feature_guide/kv_pool.md:38 +msgid "" +"Ensure that the hccn.conf file exists in the environment. If using " +"Docker, mount it into the container." +msgstr "确保环境中存在 hccn.conf 文件。如果使用 Docker,请将其挂载到容器中。" + +#: ../../source/user_guide/feature_guide/kv_pool.md:44 +msgid "Install Mooncake" +msgstr "安装 Mooncake" + +#: ../../source/user_guide/feature_guide/kv_pool.md:46 +msgid "" +"Mooncake is the serving platform for Kimi, a leading LLM service provided" +" by Moonshot AI. Installation and Compilation Guide: " +". First, we need to obtain the Mooncake project. Refer to the " +"following command:" +msgstr "" +"Mooncake 是 Moonshot AI 提供的领先 LLM 服务 Kimi 的推理平台。 安装与编译指南:" +"。 首先,我们需要获取 Mooncake 项目。参考以下命令:" + +#: ../../source/user_guide/feature_guide/kv_pool.md:54 +msgid "(Optional) Replace go install url if the network is poor" +msgstr "(可选)如果网络状况不佳,替换 go install 的 URL" + +#: ../../source/user_guide/feature_guide/kv_pool.md:61 +msgid "Install mpi" +msgstr "安装 mpi" + +#: ../../source/user_guide/feature_guide/kv_pool.md:67 +msgid "Install the relevant dependencies. The installation of Go is not required." +msgstr "安装相关依赖。无需安装 Go。" + +#: ../../source/user_guide/feature_guide/kv_pool.md:73 +msgid "Compile and install" +msgstr "编译并安装" + +#: ../../source/user_guide/feature_guide/kv_pool.md:83 +msgid "Set environment variables" +msgstr "设置环境变量" + +#: ../../source/user_guide/feature_guide/kv_pool.md:85 +msgid "**Note:**" +msgstr "**注意:**" + +#: ../../source/user_guide/feature_guide/kv_pool.md:87 +msgid "Adjust the Python path according to your specific Python installation" +msgstr "根据您具体的 Python 安装调整 Python 路径" + +#: ../../source/user_guide/feature_guide/kv_pool.md:88 +msgid "" +"Ensure `/usr/local/lib` and `/usr/local/lib64` are in your " +"`LD_LIBRARY_PATH`" +msgstr "确保 `/usr/local/lib` 和 `/usr/local/lib64` 在您的 `LD_LIBRARY_PATH` 中" + +#: ../../source/user_guide/feature_guide/kv_pool.md:94 +msgid "Environment Variables Description" +msgstr "环境变量说明" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "Hardware" +msgstr "硬件" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "HDK & CANN versions" +msgstr "HDK 与 CANN 版本" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "Export Command" +msgstr "导出命令" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "800 I/T A3 series" +msgstr "800 I/T A3 系列" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "HDK >= 26.0.0
CANN >= 9.0.0" +msgstr "HDK >= 26.0.0
CANN >= 9.0.0" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "`export ASCEND_ENABLE_USE_FABRIC_MEM=1`" +msgstr "`export ASCEND_ENABLE_USE_FABRIC_MEM=1`" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "" +"**Recommended**. Enables unified memory address direct transmission " +"scheme." +msgstr "**推荐**。启用统一内存地址直传方案。" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "25.5.0<=HDK<26.0.0" +msgstr "25.5.0<=HDK<26.0.0" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "`export ASCEND_BUFFER_POOL=4:8`" +msgstr "`export ASCEND_BUFFER_POOL=4:8`" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "" +"Configures the number and size of buffers on the NPU Device for " +"aggregation and KV transfer (e.g., `4:8` means 4 buffers of 8MB)." +msgstr "配置 NPU 设备上用于聚合和 KV 传输的缓冲区数量和大小(例如,`4:8` 表示 4 个 8MB 的缓冲区)。" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "800 I/T A2 series" +msgstr "800 I/T A2 系列" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "N/A" +msgstr "不适用" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "`export HCCL_INTRA_ROCE_ENABLE=1`" +msgstr "`export HCCL_INTRA_ROCE_ENABLE=1`" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "Required by direct transmission cheme on 800 I/T A2 series" +msgstr "800 I/T A2 系列直传方案所需" + +#: ../../source/user_guide/feature_guide/kv_pool.md:102 +msgid "FAQ for HIXL (ascend_direct) backend" +msgstr "HIXL (ascend_direct) 后端常见问题" + +#: ../../source/user_guide/feature_guide/kv_pool.md:104 +#, python-format +msgid "" +"For common troubleshooting and issue localization guidance for HIXL " +"(ascend_direct), see: " +"" +msgstr "" +"关于 HIXL (ascend_direct) 的常见故障排除和问题定位指南,请参阅:" +"" + +#: ../../source/user_guide/feature_guide/kv_pool.md:107 +msgid "Run Mooncake Master" +msgstr "运行 Mooncake Master" + +#: ../../source/user_guide/feature_guide/kv_pool.md:109 +msgid "1.Configure mooncake.json" +msgstr "1. 配置 mooncake.json" + +#: ../../source/user_guide/feature_guide/kv_pool.md:111 +msgid "" +"The environment variable **MOONCAKE_CONFIG_PATH** is configured to the " +"full path where mooncake.json is located." +msgstr "环境变量 **MOONCAKE_CONFIG_PATH** 配置为 mooncake.json 所在位置的完整路径。" + +#: ../../source/user_guide/feature_guide/kv_pool.md:123 +msgid "" +"**metadata_server**: Configured as **P2PHANDSHAKE**. **protocol:** Must" +" be set to 'Ascend' on the NPU. **device_name**: \"\" " +"**master_server_address**: Configured with the IP and port of the master " +"service. **global_segment_size**: Registered memory size per card to " +"the KV Pool. **Needs to be aligned to 1GB.**" +msgstr "" +"**metadata_server**: 配置为 **P2PHANDSHAKE**。 **protocol:** 在 NPU 上必须设置为 'Ascend'。" +"**device_name**: \"\" **master_server_address**: 配置 master 服务的 IP 和端口。 " +"**global_segment_size**: 每张卡注册到 KV Pool 的内存大小。**需要对齐到 1GB。**" + +#: ../../source/user_guide/feature_guide/kv_pool.md:129 +msgid "2.Start mooncake_master" +msgstr "2. 启动 mooncake_master" + +#: ../../source/user_guide/feature_guide/kv_pool.md:131 +msgid "Under the mooncake folder:" +msgstr "在 mooncake 文件夹下:" + +#: ../../source/user_guide/feature_guide/kv_pool.md:137 +msgid "" +"`eviction_high_watermark_ratio` determines the watermark where Mooncake " +"Store will perform eviction,and `eviction_ratio` determines the portion " +"of stored objects that would be evicted. `default_kv_lease_ttl` controls " +"the default lease TTL for KV objects (milliseconds); configure it via " +"`--default_kv_lease_ttl` and keep it larger than `ASCEND_CONNECT_TIMEOUT`" +" and `ASCEND_TRANSFER_TIMEOUT`." +msgstr "" +"`eviction_high_watermark_ratio` 决定了 Mooncake Store 执行淘汰的水位线,`eviction_ratio` 决定了将被淘汰的存储对象比例。" +"`default_kv_lease_ttl` 控制 KV 对象的默认租约 TTL(毫秒);通过 `--default_kv_lease_ttl` 配置,并保持其大于 " +"`ASCEND_CONNECT_TIMEOUT` 和 `ASCEND_TRANSFER_TIMEOUT`。" + +#: ../../source/user_guide/feature_guide/kv_pool.md:140 +#: ../../source/user_guide/feature_guide/kv_pool.md:603 +msgid "PD Disaggregation Scenario" +msgstr "PD 解耦场景" + +#: ../../source/user_guide/feature_guide/kv_pool.md:142 +#: ../../source/user_guide/feature_guide/kv_pool.md:605 +msgid "1.Run `prefill` Node and `decode` Node" +msgstr "1. 运行 `prefill` 节点和 `decode` 节点" + +#: ../../source/user_guide/feature_guide/kv_pool.md:144 +msgid "" +"Using `MultiConnector` to simultaneously utilize both " +"`MooncakeConnectorV1` and `AscendStoreConnector`. `MooncakeConnectorV1` " +"performs kv_transfer, while `AscendStoreConnector` serves as the prefix-" +"cache node." +msgstr "" +"使用 `MultiConnector` 同时利用 `MooncakeConnectorV1` 和 `AscendStoreConnector`。" +"`MooncakeConnectorV1` 执行 kv_transfer,而 `AscendStoreConnector` 作为 prefix-cache 节点。" + +#: ../../source/user_guide/feature_guide/kv_pool.md:146 +#: ../../source/user_guide/feature_guide/kv_pool.md:611 +#: ../../source/user_guide/feature_guide/kv_pool.md:771 +msgid "`prefill` Node:" +msgstr "`prefill` 节点:" + +#: ../../source/user_guide/feature_guide/kv_pool.md:152 +msgid "The content of the multi_producer.sh script:" +msgstr "multi_producer.sh 脚本的内容:" + +#: ../../source/user_guide/feature_guide/kv_pool.md:224 +#: ../../source/user_guide/feature_guide/kv_pool.md:690 +#: ../../source/user_guide/feature_guide/kv_pool.md:841 +msgid "`decode` Node:" +msgstr "`decode` 节点:" + +#: ../../source/user_guide/feature_guide/kv_pool.md:230 +msgid "The content of multi_consumer.sh:" +msgstr "multi_consumer.sh 的内容:" + +#: ../../source/user_guide/feature_guide/kv_pool.md:292 +msgid "" +"Currently, the key-value pool in PD Disaggregate only stores the kv cache" +" generated by the Prefill node by default. In models using MLA, it is now" +" supported that the Decode node stores the kv cache for use by the " +"Prefill node, enabled by adding `consumer_is_to_put: true` to the " +"AscendStoreConnector. If the Prefill node enables PP, `prefill_pp_size` " +"or `prefill_pp_layer_partition` also needs to be set. Example as follows:" +msgstr "" +"目前,PD 解耦中的键值池默认仅存储 Prefill 节点生成的 kv cache。在使用 MLA 的模型中,现已支持 Decode 节点存储 kv cache 供 " +"Prefill 节点使用,通过在 AscendStoreConnector 中添加 `consumer_is_to_put: true` 来启用。如果 Prefill " +"节点启用了 PP,则还需要设置 `prefill_pp_size` 或 `prefill_pp_layer_partition`。示例如下:" + +#: ../../source/user_guide/feature_guide/kv_pool.md:308 +msgid "2、Start proxy_server" +msgstr "2、启动 proxy_server" + +#: ../../source/user_guide/feature_guide/kv_pool.md:319 +msgid "Change localhost to your actual IP address." +msgstr "将 localhost 更改为您的实际 IP 地址。" + +#: ../../source/user_guide/feature_guide/kv_pool.md:321 +msgid "3.Run Inference" +msgstr "3. 运行推理" + +#: ../../source/user_guide/feature_guide/kv_pool.md:323 +msgid "" +"Configure the localhost, port, and model weight path in the command to " +"your own settings." +msgstr "将命令中的 localhost、端口和模型权重路径配置为您自己的设置。" + +#: ../../source/user_guide/feature_guide/kv_pool.md:325 +#: ../../source/user_guide/feature_guide/kv_pool.md:388 +msgid "Short question:" +msgstr "短问题:" + +#: ../../source/user_guide/feature_guide/kv_pool.md:331 +#: ../../source/user_guide/feature_guide/kv_pool.md:394 +msgid "Long question:" +msgstr "长问题:" + +#: ../../source/user_guide/feature_guide/kv_pool.md:337 +msgid "PD-Mixed Inference" +msgstr "PD混合推理" + +#: ../../source/user_guide/feature_guide/kv_pool.md:339 +#: ../../source/user_guide/feature_guide/kv_pool.md:916 +msgid "1.Run Mixed Department Script" +msgstr "1. 运行混合部署脚本" + +#: ../../source/user_guide/feature_guide/kv_pool.md:345 +#: ../../source/user_guide/feature_guide/kv_pool.md:1056 +msgid "Content of pd_mix.sh:" +msgstr "pd_mix.sh 内容:" + +#: ../../source/user_guide/feature_guide/kv_pool.md:384 +msgid "2.Run Inference" +msgstr "2. 运行推理" + +#: ../../source/user_guide/feature_guide/kv_pool.md:386 +msgid "" +"Configure the localhost, port, and model weight path in the command to " +"your own settings. The requests sent will only go to the port where the " +"mixed deployment script is located, and there is no need to start a " +"separate proxy." +msgstr "将命令中的 localhost、端口和模型权重路径配置为您自己的设置。发送的请求只会到达混合部署脚本所在的端口,无需启动单独的代理。" + +#: ../../source/user_guide/feature_guide/kv_pool.md:400 +msgid "" +"Note: For MooncakeStore with `ASCEND_BUFFER_POOL` enabled, it is " +"recommended to perform a warm-up phase before running actual performance " +"benchmarks." +msgstr "注意:对于启用了 `ASCEND_BUFFER_POOL` 的 MooncakeStore,建议在实际运行性能基准测试之前进行预热阶段。" + +#: ../../source/user_guide/feature_guide/kv_pool.md:402 +msgid "" +"This is because HCCL one-sided communication connections are created " +"lazily after the instance is launched when Device-to-Device communication" +" is involved. Currently, full-mesh connections between all devices are " +"required. Establishing these connections introduces a one-time time " +"overhead and persistent device memory consumption (4 MB of device memory " +"per connection)." +msgstr "这是因为当涉及设备到设备通信时,HCCL 单边通信连接是在实例启动后延迟创建的。目前,需要在所有设备之间建立全连接。建立这些连接会引入一次性时间开销和持续的设备内存消耗(每个连接消耗 4 MB 设备内存)。" + +#: ../../source/user_guide/feature_guide/kv_pool.md:404 +msgid "" +"**For warm-up, it is recommended to issue requests with an input sequence" +" length of 8K and an output sequence length of 1, with the total number " +"of requests being 2–3× the number of devices (cards/dies).**" +msgstr "**对于预热,建议发送输入序列长度为 8K、输出序列长度为 1 的请求,请求总数为设备(卡/芯片)数量的 2-3 倍。**" + +#: ../../source/user_guide/feature_guide/kv_pool.md:406 +msgid "Example of using Memcache as a KV Pool backend" +msgstr "使用 Memcache 作为 KV 池后端的示例" + +#: ../../source/user_guide/feature_guide/kv_pool.md:408 +msgid "Installing Memcache" +msgstr "安装 Memcache" + +#: ../../source/user_guide/feature_guide/kv_pool.md:410 +msgid "" +"**MemCache depends on MemFabric. Therefore, MemFabric must be " +"installed.Installing the memcache after the memfabric is installed.**" +msgstr "**MemCache 依赖于 MemFabric。因此,必须先安装 MemFabric。在 memfabric 安装完成后,再安装 memcache。**" + +#: ../../source/user_guide/feature_guide/kv_pool.md:412 +msgid "" +"**memfabric_hybrid**: " +"" +msgstr "**memfabric_hybrid**: " + +#: ../../source/user_guide/feature_guide/kv_pool.md:414 +msgid "" +"**memcache**: " +"" +msgstr "**memcache**: " + +#: ../../source/user_guide/feature_guide/kv_pool.md:416 +msgid "Configuring the memcache Config File" +msgstr "配置 memcache 配置文件" + +#: ../../source/user_guide/feature_guide/kv_pool.md:419 +msgid "" +"**Config file parameters " +"description**:" +msgstr "**配置文件参数说明**:" + +#: ../../source/user_guide/feature_guide/kv_pool.md:421 +msgid "" +"Set TLS certificate configurations. If TLS is disabled, you do not need " +"to upload a certificate. If TLS is enabled, you need to upload a " +"certificate." +msgstr "设置 TLS 证书配置。如果禁用 TLS,则无需上传证书。如果启用 TLS,则需要上传证书。" + +#: ../../source/user_guide/feature_guide/kv_pool.md:434 +msgid "" +"You are advised to copy mmc-local.conf and mmc-meta.conf to your own path" +" and modify them, and set the MMC_META_CONFIG_PATH environment variable " +"to the path of your own mmc-meta.conf file." +msgstr "建议您将 mmc-local.conf 和 mmc-meta.conf 复制到您自己的路径并进行修改,并将 MMC_META_CONFIG_PATH 环境变量设置为您自己的 mmc-meta.conf 文件的路径。" + +#: ../../source/user_guide/feature_guide/kv_pool.md:436 +msgid "**mmc-meta.conf:**" +msgstr "**mmc-meta.conf:**" + +#: ../../source/user_guide/feature_guide/kv_pool.md:485 +#: ../../source/user_guide/feature_guide/kv_pool.md:559 +msgid "**Key Focuses:**" +msgstr "**关键要点:**" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "`ock.mmc.meta_service_url`" +msgstr "`ock.mmc.meta_service_url`" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "" +"Configure the IP address and port number of the master node. The IP " +"address and port number of the P node and D node can be the same." +msgstr "配置主节点的 IP 地址和端口号。P 节点和 D 节点的 IP 地址和端口号可以相同。" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "`ock.mmc.meta_service.config_store_url`" +msgstr "`ock.mmc.meta_service.config_store_url`" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "`ock.mmc.meta.ha.enable`" +msgstr "`ock.mmc.meta.ha.enable`" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "Set to `false` to disable TLS authentication modification." +msgstr "设置为 `false` 以禁用 TLS 认证修改。" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "`ock.mmc.config_store.tls.enable`" +msgstr "`ock.mmc.config_store.tls.enable`" + +#: ../../source/user_guide/feature_guide/kv_pool.md:494 +msgid "**mmc-local.conf:**" +msgstr "**mmc-local.conf:**" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "`ock.mmc.local_service.config_store_url`" +msgstr "`ock.mmc.local_service.config_store_url`" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "`ock.mmc.local_service.world_size`" +msgstr "`ock.mmc.local_service.world_size`" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "" +"Total count of local service, including services that will be added in " +"the future." +msgstr "本地服务的总数,包括未来将添加的服务。" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "`ock.mmc.local_service.protocol`" +msgstr "`ock.mmc.local_service.protocol`" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "" +"`host_rdma` (default), `device_rdma` (supported for A2 and A3 when device" +" ROCE available, recommended for A2), `device_sdma` (supported for A3 " +"when HCCS available, recommended for A3). Currently does not support " +"heterogeneous protocol setting." +msgstr "`host_rdma` (默认), `device_rdma` (A2 和 A3 在设备 ROCE 可用时支持,推荐用于 A2), `device_sdma` (A3 在 HCCS 可用时支持,推荐用于 A3)。目前不支持异构协议设置。" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "`ock.mmc.local_service.dram.size`" +msgstr "`ock.mmc.local_service.dram.size`" + +#: ../../source/user_guide/feature_guide/kv_pool.md +msgid "" +"Sets the size of the memory occupied by the master. The configured value " +"is the size of the memory occupied by each card." +msgstr "设置主节点占用的内存大小。配置的值为每张卡占用的内存大小。" + +#: ../../source/user_guide/feature_guide/kv_pool.md:571 +msgid "Memcache environment variables" +msgstr "Memcache 环境变量" + +#: ../../source/user_guide/feature_guide/kv_pool.md:580 +msgid "Run Memcache Master" +msgstr "运行 Memcache 主节点" + +#: ../../source/user_guide/feature_guide/kv_pool.md:582 +msgid "Starting the MetaService service." +msgstr "启动 MetaService 服务。" + +#: ../../source/user_guide/feature_guide/kv_pool.md:593 +msgid "Method 2 for starting the MetaService service." +msgstr "启动 MetaService 服务的方法 2。" + +#: ../../source/user_guide/feature_guide/kv_pool.md:607 +msgid "" +"Using `MultiConnector` to simultaneously utilize both " +"`MooncakeConnectorV1` and `AscendStoreConnector`. `MooncakeConnectorV1` " +"performs kv_transfer, while `AscendStoreConnector` enables KV Cache Pool" +msgstr "使用 `MultiConnector` 同时利用 `MooncakeConnectorV1` 和 `AscendStoreConnector`。`MooncakeConnectorV1` 执行 kv_transfer,而 `AscendStoreConnector` 启用 KV 缓存池" + +#: ../../source/user_guide/feature_guide/kv_pool.md:609 +#: ../../source/user_guide/feature_guide/kv_pool.md:918 +msgid "800I A2/800T A2 Series" +msgstr "800I A2/800T A2 系列" + +#: ../../source/user_guide/feature_guide/kv_pool.md:769 +#: ../../source/user_guide/feature_guide/kv_pool.md:1050 +msgid "800I A3/800T A3 Series" +msgstr "800I A3/800T A3 系列" + +#: ../../source/user_guide/feature_guide/kv_pool.md:910 +msgid "[2、Start proxy_server](#2start-proxy_server)" +msgstr "[2、启动 proxy_server](#2start-proxy_server)" + +#: ../../source/user_guide/feature_guide/kv_pool.md:912 +msgid "[3、run-inference](#3run-inference)" +msgstr "[3、运行推理](#3run-inference)" + +#: ../../source/user_guide/feature_guide/kv_pool.md:914 +msgid "PD-Mixed Scenario" +msgstr "PD混合场景" + +#: ../../source/user_guide/feature_guide/kv_pool.md:920 +msgid "The deepseek model needs to be run in a two-node cluster." +msgstr "deepseek 模型需要在双节点集群中运行。" + +#: ../../source/user_guide/feature_guide/kv_pool.md:922 +msgid "**Run_pd_mix_1.sh:**" +msgstr "**Run_pd_mix_1.sh:**" + +#: ../../source/user_guide/feature_guide/kv_pool.md:985 +msgid "**Run_pd_mix_2.sh:**" +msgstr "**Run_pd_mix_2.sh:**" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po new file mode 100644 index 00000000..e1bb9c05 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po @@ -0,0 +1,477 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:1 +msgid "Distributed DP Server With Large-Scale Expert Parallelism" +msgstr "分布式数据并行服务器与大规模专家并行" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:3 +msgid "Getting Start" +msgstr "快速开始" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:5 +msgid "" +"vLLM-Ascend now supports prefill-decode (PD) disaggregation in the large-" +"scale **Expert Parallelism (EP)** scenario. To achieve better " +"performance, the distributed DP server is applied in vLLM-Ascend. In the " +"PD separation scenario, different optimization strategies can be " +"implemented based on the distinct characteristics of PD nodes, thereby " +"enabling more flexible model deployment. Taking the DeepSeek model as an " +"example, using 8 Atlas 800T A3 servers to deploy the model. Assume the IP" +" of the servers starts from 192.0.0.1 and ends by 192.0.0.8. Use the " +"first 4 servers as prefiller nodes and the last 4 servers as decoder " +"nodes. And the prefiller nodes are deployed as master nodes " +"independently, while the decoder nodes use the 192.0.0.5 node as the " +"master node." +msgstr "" +"vLLM-Ascend 现已支持在大规模**专家并行(EP)**场景下的预填充-解码(PD)解耦。为获得更好的性能,vLLM-Ascend 中应用了分布式数据并行服务器。在 PD 分离场景下,可以根据 PD 节点的不同特性实施不同的优化策略,从而实现更灵活的模型部署。以 DeepSeek 模型为例,使用 8 台 Atlas 800T A3 服务器部署模型。假设服务器 IP 从 192.0.0.1 开始到 192.0.0.8 结束。使用前 4 台服务器作为预填充节点,后 4 台服务器作为解码节点。并且预填充节点独立部署为主节点,而解码节点使用 192.0.0.5 节点作为主节点。" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:8 +msgid "Verify Multi-Node Communication Environment" +msgstr "验证多节点通信环境" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:10 +msgid "Physical Layer Requirements" +msgstr "物理层要求" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:12 +msgid "" +"The physical machines must be located on the same WLAN, with network " +"connectivity." +msgstr "物理机必须位于同一无线局域网内,并具备网络连通性。" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:13 +msgid "" +"All NPUs must be interconnected. For the Atlas A2 generation, intra-node " +"connectivity is via HCCS, and inter-node connectivity is via RDMA. For " +"the Atlas A3 generation, both intra-node and inter-node connectivity are " +"via HCCS." +msgstr "" +"所有 NPU 必须互连。对于 Atlas A2 代,节点内连接通过 HCCS,节点间连接通过 RDMA。对于 Atlas A3 代,节点内和节点间连接均通过 HCCS。" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:15 +msgid "Verification Process" +msgstr "验证流程" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md +msgid "A3" +msgstr "A3" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:22 +#: ../../source/user_guide/feature_guide/large_scale_ep.md:64 +msgid "Single Node Verification:" +msgstr "单节点验证:" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:24 +#: ../../source/user_guide/feature_guide/large_scale_ep.md:66 +msgid "" +"Execute the following commands on each node in sequence. The results must" +" all be `success` and the status must be `UP`:" +msgstr "依次在每个节点上执行以下命令。结果必须全部为 `success` 且状态必须为 `UP`:" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:41 +#: ../../source/user_guide/feature_guide/large_scale_ep.md:83 +msgid "Get NPU IP Addresses" +msgstr "获取 NPU IP 地址" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:47 +msgid "Get superpodid and SDID" +msgstr "获取 superpodid 和 SDID" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:53 +#: ../../source/user_guide/feature_guide/large_scale_ep.md:89 +msgid "Cross-Node PING Test" +msgstr "跨节点 PING 测试" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md +msgid "A2" +msgstr "A2" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:98 +msgid "Large-Scale EP model deployment" +msgstr "大规模 EP 模型部署" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:100 +msgid "Generate script with configurations" +msgstr "生成配置脚本" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:102 +msgid "" +"In the PD separation scenario, we provide an optimized configuration. You" +" can use the following shell script for configuring the prefiller and " +"decoder nodes respectively." +msgstr "在 PD 分离场景下,我们提供了优化配置。您可以使用以下 shell 脚本分别配置预填充节点和解码节点。" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md +msgid "Prefiller node" +msgstr "预填充节点" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md +msgid "Decoder node" +msgstr "解码节点" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:241 +msgid "Start Distributed DP Server for prefill-decode disaggregation" +msgstr "启动用于预填充-解码解耦的分布式数据并行服务器" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:243 +msgid "" +"Execute the following Python file on all nodes to use the distributed DP " +"server. (We recommend using this feature on the v0.9.1 official release)" +msgstr "在所有节点上执行以下 Python 文件以使用分布式数据并行服务器。(我们建议在 v0.9.1 正式版本中使用此功能)" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:317 +msgid "" +"Note that the prefiller nodes and the decoder nodes may have different " +"configurations. In this example, each prefiller node is deployed as a " +"master node independently, while the decoder nodes use the 192.0.0.5 node" +" as the master node. This leads to differences in 'dp_size_local' and " +"'dp_rank_start'" +msgstr "请注意,预填充节点和解码节点可能具有不同的配置。在此示例中,每个预填充节点独立部署为主节点,而解码节点使用 192.0.0.5 节点作为主节点。这导致了 'dp_size_local' 和 'dp_rank_start' 的差异。" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:319 +msgid "Example proxy for Distributed DP Server" +msgstr "分布式数据并行服务器示例代理" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:321 +msgid "" +"In the PD separation scenario, we need a proxy to distribute requests. " +"Execute the following commands to enable the example proxy:" +msgstr "在 PD 分离场景下,我们需要一个代理来分发请求。执行以下命令以启用示例代理:" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "Parameter" +msgstr "参数" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "meaning" +msgstr "含义" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "--port" +msgstr "--port" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "Proxy service Port" +msgstr "代理服务端口" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "--host" +msgstr "--host" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "Proxy service Host IP" +msgstr "代理服务主机 IP" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "--prefiller-hosts" +msgstr "--prefiller-hosts" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "Hosts of prefiller nodes" +msgstr "预填充节点主机列表" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "--prefiller-hosts-num" +msgstr "--prefiller-hosts-num" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "Number of repetitions for prefiller node hosts" +msgstr "预填充节点主机重复次数" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "--prefiller-ports" +msgstr "--prefiller-ports" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "Ports of prefiller nodes" +msgstr "预填充节点端口列表" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "--prefiller-ports-inc" +msgstr "--prefiller-ports-inc" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "Number of increments for prefiller node ports" +msgstr "预填充节点端口增量数" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "--decoder-hosts" +msgstr "--decoder-hosts" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "Hosts of decoder nodes" +msgstr "解码节点主机列表" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "--decoder-hosts-num" +msgstr "--decoder-hosts-num" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "Number of repetitions for decoder node hosts" +msgstr "解码节点主机重复次数" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "--decoder-ports" +msgstr "--decoder-ports" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "Ports of decoder nodes" +msgstr "解码节点端口列表" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "--decoder-ports-inc" +msgstr "--decoder-ports-inc" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "Number of increments for decoder node ports" +msgstr "解码节点端口增量数" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:364 +msgid "" +"You can get the proxy program in the repository's examples, " +"[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-" +"project/vllm-" +"ascend/blob/v0.9.1-dev/examples/disaggregate_prefill_v1/load_balance_proxy_server_example.py)" +msgstr "您可以在仓库的示例中找到代理程序,[load_balance_proxy_server_example.py](https://github.com/vllm-project/vllm-ascend/blob/v0.9.1-dev/examples/disaggregate_prefill_v1/load_balance_proxy_server_example.py)" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:366 +msgid "Benchmark" +msgstr "基准测试" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:368 +msgid "" +"We recommend using aisbench tool to assess performance. " +"[aisbench](https://gitee.com/aisbench/benchmark). Execute the following " +"commands to install aisbench" +msgstr "我们推荐使用 aisbench 工具评估性能。[aisbench](https://gitee.com/aisbench/benchmark)。执行以下命令安装 aisbench" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:376 +msgid "" +"You need to cancel the http proxy before assessing performance, as " +"follows:" +msgstr "在评估性能前,您需要取消 http 代理,如下所示:" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:384 +msgid "" +"You can place your datasets in the directory: " +"`benchmark/ais_bench/datasets`" +msgstr "您可以将数据集放置在目录:`benchmark/ais_bench/datasets` 中" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:385 +msgid "" +"You can change the configuration in the directory " +":`benchmark/ais_bench/benchmark/configs/models/vllm_api` Take " +"`vllm_api_stream_chat.py` as an example:" +msgstr "您可以在目录:`benchmark/ais_bench/benchmark/configs/models/vllm_api` 中更改配置。以 `vllm_api_stream_chat.py` 为例:" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:411 +msgid "" +"Taking the gsm8k dataset as an example, execute the following commands to" +" assess performance." +msgstr "以 gsm8k 数据集为例,执行以下命令评估性能。" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:417 +msgid "" +"For more details on commands and parameters for aisbench, refer to " +"[aisbench](https://gitee.com/aisbench/benchmark)" +msgstr "有关 aisbench 命令和参数的更多详细信息,请参考 [aisbench](https://gitee.com/aisbench/benchmark)" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:419 +msgid "Prefill & Decode Configuration Details" +msgstr "预填充与解码配置详情" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:421 +msgid "In the PD separation scenario, we provide an optimized configuration." +msgstr "在 PD 分离场景下,我们提供了优化配置。" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:423 +msgid "**prefiller node**" +msgstr "**预填充节点**" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:425 +msgid "set HCCL_BUFFSIZE=256" +msgstr "设置 HCCL_BUFFSIZE=256" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:426 +msgid "add '--enforce-eager' command to 'vllm serve'" +msgstr "向 'vllm serve' 添加 '--enforce-eager' 命令" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:427 +#: ../../source/user_guide/feature_guide/large_scale_ep.md:449 +msgid "Take '--kv-transfer-config' as follows:" +msgstr "按如下方式设置 '--kv-transfer-config':" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:440 +#: ../../source/user_guide/feature_guide/large_scale_ep.md:462 +msgid "Take '--additional-config' as follows:" +msgstr "按如下方式设置 '--additional-config':" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:446 +msgid "**decoder node**" +msgstr "**解码节点**" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:448 +msgid "set HCCL_BUFFSIZE=1024" +msgstr "设置 HCCL_BUFFSIZE=1024" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:468 +msgid "Parameters Description" +msgstr "参数说明" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:470 +msgid "'--additional-config' Parameter Introduction:" +msgstr "'--additional-config' 参数介绍:" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:472 +msgid "" +"**\"enable_weight_nz_layout\"**: Whether to convert quantized weights to " +"NZ format to accelerate matrix multiplication." +msgstr "**\"enable_weight_nz_layout\"**:是否将量化权重转换为 NZ 格式以加速矩阵乘法。" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:473 +msgid "" +"**\"enable_prefill_optimizations\"**: Whether to enable DeepSeek models' " +"prefill optimizations.
" +msgstr "**\"enable_prefill_optimizations\"**:是否启用 DeepSeek 模型的预填充优化。
" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:476 +msgid "Enable MTP Add the following command to your configurations." +msgstr "启用 MTP 在您的配置中添加以下命令。" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:483 +msgid "Recommended Configuration Example" +msgstr "推荐配置示例" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:485 +msgid "" +"For example, if the average input length is 3.5k, and the output length " +"is 1.1k, the context length is 16k, the max length of the input dataset " +"is 7K. In this scenario, we give a recommended configuration for " +"distributed DP server with high EP. Here we use 4 nodes for prefill and 4" +" nodes for decode." +msgstr "例如,如果平均输入长度为 3.5k,输出长度为 1.1k,上下文长度为 16k,输入数据集的最大长度为 7K。在此场景下,我们为具有高 EP 的分布式数据并行服务器提供了一个推荐配置。这里我们使用 4 个节点进行预填充,4 个节点进行解码。" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "node" +msgstr "节点" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "DP" +msgstr "数据并行" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "TP" +msgstr "张量并行" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "EP" +msgstr "专家并行" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "max-model-len" +msgstr "最大模型长度" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "max-num-batched-tokens" +msgstr "最大批处理令牌数" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "max-num-seqs" +msgstr "最大序列数" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "gpu-memory-utilization" +msgstr "GPU内存利用率" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "prefill" +msgstr "预填充" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "2" +msgstr "2" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "8" +msgstr "8" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "16" +msgstr "16" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "17000" +msgstr "17000" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "16384" +msgstr "16384" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "4" +msgstr "4" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "0.9" +msgstr "0.9" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "decode" +msgstr "解码" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "64" +msgstr "64" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "1" +msgstr "1" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "256" +msgstr "256" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:282 +msgid "28" +msgstr "28" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:493 +msgid "" +"Note that these configurations are not related to optimization. You need " +"to adjust these parameters based on actual scenarios." +msgstr "请注意,这些配置与优化无关。您需要根据实际场景调整这些参数。" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:496 +msgid "FAQ" +msgstr "常见问题" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:498 +msgid "1. Prefiller nodes need to warm up" +msgstr "1. 预填充节点需要预热" + +#: ../../source/user_guide/feature_guide/large_scale_ep.md:500 +msgid "" +"Since the computation of some NPU operators requires several rounds of " +"warm-up to achieve best performance, we recommend preheating the service " +"with some requests before conducting performance tests to achieve the " +"best end-to-end throughput." +msgstr "由于部分NPU算子的计算需要经过数轮预热才能达到最佳性能,我们建议在进行性能测试前,先用一些请求预热服务,以达到最佳的端到端吞吐量。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/layer_sharding.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/layer_sharding.po new file mode 100644 index 00000000..0f135a35 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/layer_sharding.po @@ -0,0 +1,185 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:1 +msgid "Layer Sharding Linear Guide" +msgstr "层分片线性算子指南" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:3 +msgid "Overview" +msgstr "概述" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:5 +msgid "" +"**Layer Shard Linear** is a memory-optimization feature designed for " +"large language model (LLM) inference. It addresses the high memory " +"pressure caused by **repeated linear operators across many layers** that " +"share identical structure but have distinct weights." +msgstr "" +"**层分片线性算子** 是一项为大语言模型推理设计的内存优化功能。它旨在解决由**跨越多层的重复线性算子**所引起的高内存压力,这些算子结构相同但权重不同。" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:7 +msgid "" +"Instead of replicating all weights on every device, **Layer Shard Linear " +"shards the weights of a \"series\" of such operators across the NPU " +"devices in a communication group**:" +msgstr "" +"与在每个设备上复制所有权重不同,**层分片线性算子将此类算子的一个\"系列\"的权重分片到通信组内的NPU设备上**:" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:9 +msgid "" +"The **i-th layer's linear weight** is stored **only on device `i % K`**, " +"where `K` is the number of devices in the group." +msgstr "" +"**第 i 层的线性权重** **仅存储在设备 `i % K` 上**,其中 `K` 是组内的设备数量。" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:10 +msgid "" +"Other devices hold a lightweight **shared dummy tensor** during " +"initialization and fetch the real weight **on-demand** via asynchronous " +"broadcast during the forward pass." +msgstr "" +"其他设备在初始化期间持有一个轻量级的**共享虚拟张量**,并在前向传播期间通过异步广播**按需**获取真实权重。" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:12 +msgid "" +"As illustrated in the figure below, this design enables broadcast to " +"reach weights: while the current layer (e.g., MLA or MOE) is being " +"computed, the system **asynchronously broadcasts the next layer's " +"weight** in the background. Because the attention computation in the MLA " +"module is sufficiently latency-bound, the weight transfer for `o_proj` is" +" **fully overlapped with computation**, making the communication " +"**latency-free from the perspective of end-to-end inference**." +msgstr "" +"如下图所示,这种设计使得广播能够触及权重:在当前层(例如MLA或MOE)进行计算时,系统在后台**异步广播下一层的权重**。由于MLA模块中的注意力计算是充分延迟受限的,`o_proj`的权重传输**与计算完全重叠**,使得从端到端推理的角度看,通信**没有额外延迟**。" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:14 +msgid "" +"This approach **preserves exact computational semantics** while " +"**significantly reducing NPU memory footprint**, especially critical for:" +msgstr "" +"这种方法**保持了精确的计算语义**,同时**显著减少了NPU内存占用**,这对于以下情况尤其关键:" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:16 +msgid "Extremely deep architectures (e.g., DeepSeek-V3/R1 with 61 layers);" +msgstr "极深的架构(例如,具有61层的DeepSeek-V3/R1);" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:17 +msgid "" +"Models using **[DSA-CP](https://github.com/vllm-project/vllm-" +"ascend/pull/4702)** or **[FlashComm2](https://github.com/vllm-project" +"/vllm-ascend/pull/4188)**, where the full `O` (output) projection matrix " +"must reside in memory per layer;" +msgstr "" +"使用 **[DSA-CP](https://github.com/vllm-project/vllm-ascend/pull/4702)** 或 **[FlashComm2](https://github.com/vllm-project/vllm-ascend/pull/4188)** 的模型,其中完整的`O`(输出)投影矩阵必须驻留在每层的内存中;" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:18 +msgid "" +"Scenarios where **attention computation latency fully overlaps** (hides) " +"the communication cost of weight broadcasting." +msgstr "**注意力计算延迟完全覆盖(隐藏)**权重广播通信成本的场景。" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:22 +msgid "Flowchart" +msgstr "流程图" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:24 +msgid "![layer shard](./images/layer_sharding.png)" +msgstr "![层分片](./images/layer_sharding.png)" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:24 +msgid "layer shard" +msgstr "层分片" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:26 +msgid "" +"**Figure.** Layer Shard Linear workflow: weights are sharded by layer " +"across devices (top), and during forward execution (bottom), asynchronous" +" broadcast **pre-fetches** the next layer's weight while the current " +"layer computes—enabling **zero-overhead** weight loading." +msgstr "" +"**图.** 层分片线性算子工作流程:权重按层分片到各设备(顶部),在前向执行期间(底部),异步广播**预取**下一层的权重,同时当前层进行计算——实现**零开销**的权重加载。" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:30 +msgid "Getting Started" +msgstr "快速开始" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:32 +msgid "" +"To enable **Layer Shard Linear**, specify the target linear layers using " +"the `--additional-config` argument when launching your inference job. For" +" example, to shard the `o_proj` and `q_b_proj` layers, use:" +msgstr "" +"要启用**层分片线性算子**,请在启动推理作业时使用 `--additional-config` 参数指定目标线性层。例如,要对 `o_proj` 和 `q_b_proj` 层进行分片,请使用:" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:40 +msgid "" +"**Restriction** In PD-disaggregated deployments, Layer Sharding can only " +"be enabled on the **P node** with `kv_role=\"kv_producer\"`. " +"`kv_role=\"kv_consumer\"` and `kv_role=\"kv_both\"` are not supported." +msgstr "" +"**限制** 在PD解耦部署中,层分片只能在 `kv_role=\"kv_producer\"` 的 **P节点** 上启用。不支持 `kv_role=\"kv_consumer\"` 和 `kv_role=\"kv_both\"`。" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:46 +msgid "Supported Scenarios" +msgstr "支持场景" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:48 +msgid "This feature delivers the greatest benefit in the following cases:" +msgstr "此功能在以下情况下能带来最大收益:" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:50 +msgid "FlashComm2-enabled" +msgstr "启用FlashComm2" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:52 +msgid "" +"When using [FlashComm2](https://github.com/vllm-project/vllm-" +"ascend/pull/4188), the full output projection (`o_proj`) matrix must be " +"resident in memory for each layer. Layer sharding significantly reduces " +"memory pressure by distributing these weights across devices." +msgstr "" +"当使用 [FlashComm2](https://github.com/vllm-project/vllm-ascend/pull/4188) 时,完整的输出投影(`o_proj`)矩阵必须驻留在每层的内存中。层分片通过将这些权重分布到各设备上,显著降低了内存压力。" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:54 +#: ../../source/user_guide/feature_guide/layer_sharding.md:71 +msgid "**Example configuration:**" +msgstr "**配置示例:**" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:65 +msgid "DSA-CP-enabled" +msgstr "启用DSA-CP" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:67 +msgid "" +"With [DSA-CP](https://github.com/vllm-project/vllm-ascend/pull/4702), " +"both `q_b_proj` and `o_proj` layers require large weight matrices to be " +"stored per layer. Sharding these layers across NPUs helps fit extremely " +"deep models (e.g., 61-layer architectures) into limited device memory." +msgstr "" +"使用 [DSA-CP](https://github.com/vllm-project/vllm-ascend/pull/4702) 时,`q_b_proj` 和 `o_proj` 层都需要每层存储大型权重矩阵。将这些层分片到多个NPU上有助于将极深的模型(例如,61层架构)装入有限的设备内存中。" + +#: ../../source/user_guide/feature_guide/layer_sharding.md:69 +msgid "" +"In PD-disaggregated deployments, this mode is supported only on the **P " +"node** with `kv_role=\"kv_producer\"`." +msgstr "" +"在PD解耦部署中,此模式仅在 `kv_role=\"kv_producer\"` 的 **P节点** 上受支持。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lmcache_ascend_deployment.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lmcache_ascend_deployment.po new file mode 100644 index 00000000..fe113440 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lmcache_ascend_deployment.po @@ -0,0 +1,100 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/lmcache_ascend_deployment.md:1 +msgid "LMCache-Ascend Deployment Guide" +msgstr "LMCache-Ascend 部署指南" + +#: ../../source/user_guide/feature_guide/lmcache_ascend_deployment.md:3 +msgid "Overview" +msgstr "概述" + +#: ../../source/user_guide/feature_guide/lmcache_ascend_deployment.md:5 +msgid "" +"LMCache-Ascend is a community maintained plugin for running LMCache on " +"the Ascend NPU." +msgstr "LMCache-Ascend 是一个社区维护的插件,用于在昇腾 NPU 上运行 LMCache。" + +#: ../../source/user_guide/feature_guide/lmcache_ascend_deployment.md:7 +msgid "" +"We provide a simple deployment guide here. For further info about " +"deployment notes, please refer to [LMCache-Ascend " +"doc](https://github.com/LMCache/LMCache-Ascend/blob/main/README.md)" +msgstr "本文提供一份简明的部署指南。关于部署的更多详细信息,请参阅 [LMCache-Ascend 文档](https://github.com/LMCache/LMCache-Ascend/blob/main/README.md)。" + +#: ../../source/user_guide/feature_guide/lmcache_ascend_deployment.md:9 +msgid "Getting Started" +msgstr "快速开始" + +#: ../../source/user_guide/feature_guide/lmcache_ascend_deployment.md:11 +msgid "Clone LMCache-Ascend Repo" +msgstr "克隆 LMCache-Ascend 仓库" + +#: ../../source/user_guide/feature_guide/lmcache_ascend_deployment.md:13 +msgid "" +"Our repo contains a kvcache ops submodule for ease of maintenance, " +"therefore we recommend cloning the repo with submodules." +msgstr "我们的仓库包含一个 kvcache 算子子模块以便于维护,因此我们建议克隆包含子模块的仓库。" + +#: ../../source/user_guide/feature_guide/lmcache_ascend_deployment.md:20 +msgid "Docker" +msgstr "Docker" + +#: ../../source/user_guide/feature_guide/lmcache_ascend_deployment.md:27 +msgid "Once that is built, run it with the following cmd" +msgstr "构建完成后,使用以下命令运行" + +#: ../../source/user_guide/feature_guide/lmcache_ascend_deployment.md:53 +msgid "Manual Installation" +msgstr "手动安装" + +#: ../../source/user_guide/feature_guide/lmcache_ascend_deployment.md:55 +msgid "" +"Assuming your working directory is ```/workspace``` and vllm/vllm-ascend " +"have already been installed." +msgstr "假设您的工作目录是 ```/workspace``` 且 vllm/vllm-ascend 已安装。" + +#: ../../source/user_guide/feature_guide/lmcache_ascend_deployment.md:57 +msgid "Install LMCache Repo" +msgstr "安装 LMCache 仓库" + +#: ../../source/user_guide/feature_guide/lmcache_ascend_deployment.md:63 +msgid "Install LMCache-Ascend Repo" +msgstr "安装 LMCache-Ascend 仓库" + +#: ../../source/user_guide/feature_guide/lmcache_ascend_deployment.md:70 +msgid "Usage" +msgstr "使用方法" + +#: ../../source/user_guide/feature_guide/lmcache_ascend_deployment.md:72 +msgid "" +"We introduce a dynamic KVConnector via LMCacheAscendConnectorV1Dynamic, " +"therefore LMCache-Ascend Connector can be used via the kv transfer config" +" in the two following setting." +msgstr "我们通过 LMCacheAscendConnectorV1Dynamic 引入了一个动态 KVConnector,因此 LMCache-Ascend 连接器可以通过以下两种场景下的 kv 传输配置来使用。" + +#: ../../source/user_guide/feature_guide/lmcache_ascend_deployment.md:74 +msgid "Online serving" +msgstr "在线服务" + +#: ../../source/user_guide/feature_guide/lmcache_ascend_deployment.md:87 +msgid "Offline" +msgstr "离线" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po index 957ec173..1fc98d4d 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po @@ -4,55 +4,102 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../user_guide/feature_guide/lora.md:1 +#: ../../source/user_guide/feature_guide/lora.md:1 msgid "LoRA Adapters Guide" msgstr "LoRA 适配器指南" -#: ../../user_guide/feature_guide/lora.md:3 +#: ../../source/user_guide/feature_guide/lora.md:3 +msgid "Overview" +msgstr "概述" + +#: ../../source/user_guide/feature_guide/lora.md:5 msgid "" -"Like vLLM, vllm-ascend supports LoRA as well. The usage and more details can" -" be found in [vLLM official " +"Like vLLM, vllm-ascend supports LoRA as well. The usage and more details " +"can be found in [vLLM official " "document](https://docs.vllm.ai/en/latest/features/lora.html)." msgstr "" "与 vLLM 类似,vllm-ascend 也支持 LoRA。用法及更多详情可参见 [vLLM " "官方文档](https://docs.vllm.ai/en/latest/features/lora.html)。" -#: ../../user_guide/feature_guide/lora.md:5 +#: ../../source/user_guide/feature_guide/lora.md:7 msgid "" -"You can also refer to " -"[this](https://docs.vllm.ai/en/latest/models/supported_models.html#list-of-" -"text-only-language-models) to find which models support LoRA in vLLM." +"You can refer to [Supported " +"Models](https://docs.vllm.ai/en/latest/models/supported_models.html#list-" +"of-text-only-language-models) to find which models support LoRA in vLLM." msgstr "" -"你也可以参考[这个链接](https://docs.vllm.ai/en/latest/models/supported_models.html#list-" -"of-text-only-language-models)来查找哪些模型在 vLLM 中支持 LoRA。" +"你可以参考[支持的模型](https://docs.vllm.ai/en/latest/models/supported_models.html#list-of-text-only-language-models)来查找 vLLM 中哪些模型支持 LoRA。" -#: ../../user_guide/feature_guide/lora.md:7 -msgid "Tips" -msgstr "提示" - -#: ../../user_guide/feature_guide/lora.md:8 +#: ../../source/user_guide/feature_guide/lora.md:9 msgid "" -"If you fail to run vllm-ascend with LoRA, you may follow [this " -"instruction](https://vllm-" -"ascend.readthedocs.io/en/latest/user_guide/feature_guide/graph_mode.html#fallback-" -"to-eager-mode) to disable graph mode and try again." -msgstr "" -"如果你在使用 LoRA 运行 vllm-ascend 时失败,可以按照[此说明](https://vllm-" -"ascend.readthedocs.io/en/latest/user_guide/feature_guide/graph_mode.html#fallback-" -"to-eager-mode)禁用图模式后再重试。" +"You can run LoRA with ACLGraph mode now. Please refer to [Graph Mode " +"Guide](./graph_mode.md) for a better LoRA performance." +msgstr "你现在可以在 ACLGraph 模式下运行 LoRA。请参考[图模式指南](./graph_mode.md)以获得更好的 LoRA 性能。" + +#: ../../source/user_guide/feature_guide/lora.md:11 +msgid "Address for downloading models:" +msgstr "模型下载地址:" + +#: ../../source/user_guide/feature_guide/lora.md:13 +msgid "" +"base model: " +msgstr "基础模型:" + +#: ../../source/user_guide/feature_guide/lora.md:14 +msgid "" +"lora model: " +msgstr "LoRA 模型:" + +#: ../../source/user_guide/feature_guide/lora.md:16 +msgid "Example" +msgstr "示例" + +#: ../../source/user_guide/feature_guide/lora.md:18 +msgid "" +"We provide a simple LoRA example here, which enables the ACLGraph mode by" +" default." +msgstr "我们在此提供了一个简单的 LoRA 示例,该示例默认启用 ACLGraph 模式。" + +#: ../../source/user_guide/feature_guide/lora.md:26 +msgid "Custom LoRA Operators" +msgstr "自定义 LoRA 算子" + +#: ../../source/user_guide/feature_guide/lora.md:28 +msgid "" +"We have implemented LoRA-related AscendC operators, such as bgmv_shrink, " +"bgmv_expand, sgmv_shrink and sgmv_expand. You can find them under the " +"\"csrc/kernels\" directory of [vllm-ascend repo](https://github.com/vllm-" +"project/vllm-ascend.git)." +msgstr "我们已经实现了与 LoRA 相关的 AscendC 算子,例如 bgmv_shrink、bgmv_expand、sgmv_shrink 和 sgmv_expand。你可以在 [vllm-ascend 代码库](https://github.com/vllm-project/vllm-ascend.git) 的 \"csrc/kernels\" 目录下找到它们。" + +#~ msgid "Tips" +#~ msgstr "提示" + +#~ msgid "" +#~ "If you fail to run vllm-ascend " +#~ "with LoRA, you may follow [this " +#~ "instruction](https://vllm-" +#~ "ascend.readthedocs.io/en/latest/user_guide/feature_guide/graph_mode.html" +#~ "#fallback-to-eager-mode) to disable " +#~ "graph mode and try again." +#~ msgstr "" +#~ "如果你在使用 LoRA 运行 vllm-ascend " +#~ "时失败,可以按照[此说明](https://vllm-" +#~ "ascend.readthedocs.io/en/latest/user_guide/feature_guide/graph_mode.html" +#~ "#fallback-to-eager-mode)禁用图模式后再重试。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/netloader.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/netloader.po new file mode 100644 index 00000000..c5303fec --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/netloader.po @@ -0,0 +1,341 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/netloader.md:1 +msgid "Netloader Guide" +msgstr "网络加载器指南" + +#: ../../source/user_guide/feature_guide/netloader.md:3 +msgid "" +"This guide provides instructions for using **Netloader** as a weight-" +"loader plugin for acceleration in **vLLM Ascend**." +msgstr "本指南介绍如何将 **Netloader** 用作权重加载器插件,以在 **vLLM Ascend** 中实现加速。" + +#: ../../source/user_guide/feature_guide/netloader.md:7 +msgid "Overview" +msgstr "概述" + +#: ../../source/user_guide/feature_guide/netloader.md:9 +msgid "" +"Netloader leverages high-bandwidth peer-to-peer (P2P) transfers between " +"NPU cards to load model weights. It is implemented as a plugin (via the " +"`register_model_loader` API added in vLLM 0.10). The workflow is:" +msgstr "Netloader 利用 NPU 卡之间的高带宽点对点 (P2P) 传输来加载模型权重。它通过插件实现(使用 vLLM 0.10 中添加的 `register_model_loader` API)。工作流程如下:" + +#: ../../source/user_guide/feature_guide/netloader.md:11 +msgid "A **server** preloads a model." +msgstr "**服务器** 预加载模型。" + +#: ../../source/user_guide/feature_guide/netloader.md:12 +msgid "A new **client** instance requests weight transfer." +msgstr "新的 **客户端** 实例请求权重传输。" + +#: ../../source/user_guide/feature_guide/netloader.md:13 +msgid "" +"After validating that the model and partitioning match, the client uses " +"HCCL collective communication (send/recv) to receive weights in the same " +"order as stored in the model." +msgstr "在验证模型和分区匹配后,客户端使用 HCCL 集合通信 (send/recv) 按照模型中存储的相同顺序接收权重。" + +#: ../../source/user_guide/feature_guide/netloader.md:15 +msgid "" +"The server runs alongside normal inference tasks via sub-threads and via " +"`stateless_init_torch_distributed_process_group` in vLLM. The client thus" +" takes over weight initialization without needing to load from storage." +msgstr "服务器通过子线程以及 vLLM 中的 `stateless_init_torch_distributed_process_group` 与常规推理任务并行运行。因此,客户端接管权重初始化,无需从存储加载。" + +#: ../../source/user_guide/feature_guide/netloader.md:17 +msgid "Flowchart" +msgstr "流程图" + +#: ../../source/user_guide/feature_guide/netloader.md:19 +msgid "![netloader flowchart](./images/netloader_flowchart.png)" +msgstr "![网络加载器流程图](./images/netloader_flowchart.png)" + +#: ../../source/user_guide/feature_guide/netloader.md:19 +msgid "netloader flowchart" +msgstr "网络加载器流程图" + +#: ../../source/user_guide/feature_guide/netloader.md:21 +msgid "Timing Diagram" +msgstr "时序图" + +#: ../../source/user_guide/feature_guide/netloader.md:23 +msgid "![netloader timing diagram](./images/netloader_timing_diagram.png)" +msgstr "![网络加载器时序图](./images/netloader_timing_diagram.png)" + +#: ../../source/user_guide/feature_guide/netloader.md:23 +msgid "netloader timing diagram" +msgstr "网络加载器时序图" + +#: ../../source/user_guide/feature_guide/netloader.md:25 +msgid "Application Scenarios" +msgstr "应用场景" + +#: ../../source/user_guide/feature_guide/netloader.md:27 +msgid "" +"**Reduce startup latency**: By reusing already loaded weights and " +"transferring them directly between NPU cards, Netloader cuts down model " +"loading time versus conventional remote/local pull strategies." +msgstr "**减少启动延迟**:通过重用已加载的权重并在 NPU 卡之间直接传输,Netloader 相比传统的远程/本地拉取策略,缩短了模型加载时间。" + +#: ../../source/user_guide/feature_guide/netloader.md:28 +msgid "" +"**Relieve network & storage load**: Avoid repeated downloads of weight " +"files from remote repositories, thus reducing pressure on central storage" +" and network traffic." +msgstr "**减轻网络和存储负载**:避免从远程仓库重复下载权重文件,从而减轻中心存储和网络流量的压力。" + +#: ../../source/user_guide/feature_guide/netloader.md:29 +msgid "" +"**Improve resource utilization & lower cost**: Faster loading allows less" +" reliance on standby compute nodes; resources can be scaled up/down more " +"flexibly." +msgstr "**提高资源利用率并降低成本**:更快的加载速度减少了对备用计算节点的依赖;资源可以更灵活地伸缩。" + +#: ../../source/user_guide/feature_guide/netloader.md:30 +msgid "" +"**Enhance business continuity & high availability**: In failure recovery," +" new instances can quickly take over without long downtime, improving " +"system reliability and user experience." +msgstr "**增强业务连续性和高可用性**:在故障恢复时,新实例可以快速接管而无需长时间停机,从而提高系统可靠性和用户体验。" + +#: ../../source/user_guide/feature_guide/netloader.md:34 +msgid "Usage" +msgstr "使用方法" + +#: ../../source/user_guide/feature_guide/netloader.md:36 +msgid "" +"To enable Netloader, pass `--load-format=netloader` and provide " +"configuration via `--model-loader-extra-config` (as a JSON string). Below" +" are the supported configuration fields:" +msgstr "要启用 Netloader,请传递 `--load-format=netloader` 并通过 `--model-loader-extra-config`(作为 JSON 字符串)提供配置。以下是支持的配置字段:" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "Field Name" +msgstr "字段名" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "Type" +msgstr "类型" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "Description" +msgstr "描述" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "Allowed Values / Notes" +msgstr "允许值 / 备注" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "**SOURCE**" +msgstr "**SOURCE**" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "List" +msgstr "列表" + +#: ../../source/user_guide/feature_guide/netloader.md +#, python-brace-format +msgid "" +"Weight data sources. Each item is a map with `device_id` and `sources`, " +"specifying the rank and its endpoints (IP:port).
Example: " +"`{\"SOURCE\": [{\"device_id\": 0, \"sources\": " +"[\"10.170.22.152:19374\"]}, {\"device_id\": 1, \"sources\": " +"[\"10.170.22.152:11228\"]}]}`
If omitted or empty, fallback to " +"default loader. The SOURCE here is second priority." +msgstr "权重数据源。每个条目是一个包含 `device_id` 和 `sources` 的映射,指定了 rank 及其端点 (IP:端口)。
示例:`{\"SOURCE\": [{\"device_id\": 0, \"sources\": [\"10.170.22.152:19374\"]}, {\"device_id\": 1, \"sources\": [\"10.170.22.152:11228\"]}]}`
如果省略或为空,则回退到默认加载器。此处的 SOURCE 是第二优先级。" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "A list of objects with keys `device_id: int` and `sources: List[str]`" +msgstr "一个对象列表,其键为 `device_id: int` 和 `sources: List[str]`" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "**MODEL**" +msgstr "**MODEL**" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "String" +msgstr "字符串" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "The model name, used to verify consistency between client and server." +msgstr "模型名称,用于验证客户端和服务器之间的一致性。" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "Defaults to the `--model` argument if not specified." +msgstr "如果未指定,则默认为 `--model` 参数。" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "**LISTEN_PORT**" +msgstr "**LISTEN_PORT**" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "Integer" +msgstr "整数" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "Base port for the server listener." +msgstr "服务器监听器的基础端口。" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "" +"The actual port = `LISTEN_PORT + RANK`. If omitted, a random valid port " +"is chosen. Valid range: 1024–65535. If out of range, that server instance" +" won’t open a listener." +msgstr "实际端口 = `LISTEN_PORT + RANK`。如果省略,则选择一个随机有效端口。有效范围:1024–65535。如果超出范围,该服务器实例将不会打开监听器。" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "**INT8_CACHE**" +msgstr "**INT8_CACHE**" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "Behavior for handling int8 parameters in quantized models." +msgstr "处理量化模型中 int8 参数的行为。" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "" +"One of `[\"hbm\", \"dram\", \"no\"]`.
- `hbm`: copy original int8 " +"parameters to high-bandwidth memory (HBM) (may cost a lot of HBM).
-" +" `dram`: copy to DRAM.
- `no`: no special handling (may lead to " +"divergence or unpredictable behavior). Default: `\"no\"`." +msgstr "取值为 `[\"hbm\", \"dram\", \"no\"]` 之一。
- `hbm`:将原始 int8 参数复制到高带宽内存 (HBM)(可能消耗大量 HBM)。
- `dram`:复制到 DRAM。
- `no`:不进行特殊处理(可能导致分歧或不可预测的行为)。默认值:`\"no\"`。" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "**INT8_CACHE_NAME**" +msgstr "**INT8_CACHE_NAME**" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "Names of parameters to which `INT8_CACHE` is applied (i.e. filtering)." +msgstr "应用 `INT8_CACHE` 的参数名称(即过滤)。" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "Default: `None` (means no filtering—all parameters)." +msgstr "默认值:`None`(表示不过滤——所有参数)。" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "**OUTPUT_PREFIX**" +msgstr "**OUTPUT_PREFIX**" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "Prefix for writing per-rank listener address/port files in server mode." +msgstr "在服务器模式下,用于写入每个 rank 监听器地址/端口文件的前缀。" + +#: ../../source/user_guide/feature_guide/netloader.md +#, python-brace-format +msgid "" +"If set, each rank writes to `{OUTPUT_PREFIX}{RANK}.txt` (text), content =" +" `IP:Port`." +msgstr "如果设置,每个 rank 将写入 `{OUTPUT_PREFIX}{RANK}.txt`(文本文件),内容为 `IP:Port`。" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "**CONFIG_FILE**" +msgstr "**CONFIG_FILE**" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "Path to a JSON file specifying the above configuration." +msgstr "指定上述配置的 JSON 文件路径。" + +#: ../../source/user_guide/feature_guide/netloader.md +msgid "" +"If provided, the SOURCE inside this file has **first priority** " +"(overrides SOURCE in other configs)." +msgstr "如果提供,此文件内的 SOURCE 具有 **最高优先级**(覆盖其他配置中的 SOURCE)。" + +#: ../../source/user_guide/feature_guide/netloader.md:50 +msgid "Example Commands & Placeholders" +msgstr "示例命令与占位符" + +#: ../../source/user_guide/feature_guide/netloader.md:52 +msgid "Replace parts in `` `<...>` `` before running." +msgstr "运行前替换 `` `<...>` `` 中的部分。" + +#: ../../source/user_guide/feature_guide/netloader.md:54 +msgid "Server" +msgstr "服务器" + +#: ../../source/user_guide/feature_guide/netloader.md:65 +msgid "Client" +msgstr "客户端" + +#: ../../source/user_guide/feature_guide/netloader.md:80 +msgid "Placeholder Descriptions" +msgstr "占位符说明" + +#: ../../source/user_guide/feature_guide/netloader.md:82 +msgid "``: Path to the model file" +msgstr "``:模型文件路径" + +#: ../../source/user_guide/feature_guide/netloader.md:83 +msgid "``: Model name (must match between server & client)" +msgstr "``:模型名称(服务器和客户端之间必须匹配)" + +#: ../../source/user_guide/feature_guide/netloader.md:84 +msgid "``: Base listening port on server" +msgstr "``:服务器上的基础监听端口" + +#: ../../source/user_guide/feature_guide/netloader.md:85 +msgid "" +"`` + ``: IP and port of the Netloader server " +"(from server log)" +msgstr "`` + ``:Netloader 服务器的 IP 和端口(来自服务器日志)" + +#: ../../source/user_guide/feature_guide/netloader.md:86 +msgid "" +"``: Client device ID (must differ from " +"server’s)" +msgstr "``:客户端设备 ID(必须与服务器的不同)" + +#: ../../source/user_guide/feature_guide/netloader.md:87 +msgid "``: Port on which client listens" +msgstr "``:客户端监听的端口" + +#: ../../source/user_guide/feature_guide/netloader.md:89 +msgid "" +"After startup, you can test consistency by issuing inference requests " +"with temperature = 0 and comparing outputs." +msgstr "启动后,您可以通过发送 temperature = 0 的推理请求并比较输出来测试一致性。" + +#: ../../source/user_guide/feature_guide/netloader.md:93 +msgid "Note & Caveats" +msgstr "注意事项与限制" + +#: ../../source/user_guide/feature_guide/netloader.md:95 +msgid "" +"If Netloader is used, **each worker process** must bind a listening port." +" That port may be user-specified or assigned randomly. If user-specified," +" ensure it is available." +msgstr "如果使用 Netloader,**每个工作进程** 都必须绑定一个监听端口。该端口可以是用户指定的,也可以是随机分配的。如果是用户指定的,请确保其可用。" + +#: ../../source/user_guide/feature_guide/netloader.md:96 +msgid "" +"Netloader requires extra HBM memory to establish HCCL connections (i.e. " +"`HCCL_BUFFERSIZE`, default ~200 MB). Users should reserve sufficient " +"capacity (e.g. via `--gpu-memory-utilization`)." +msgstr "Netloader 需要额外的 HBM 内存来建立 HCCL 连接(即 `HCCL_BUFFERSIZE`,默认约 200 MB)。用户应预留足够的容量(例如通过 `--gpu-memory-utilization`)。" + +#: ../../source/user_guide/feature_guide/netloader.md:97 +msgid "" +"It is recommended to set `VLLM_SLEEP_WHEN_IDLE=1` to mitigate unstable or" +" slow connections/transmissions. Related info: [vLLM Issue " +"#16660](https://github.com/vllm-project/vllm/issues/16660), [vLLM PR " +"#16226](https://github.com/vllm-project/vllm/pull/16226)." +msgstr "建议设置 `VLLM_SLEEP_WHEN_IDLE=1` 以缓解不稳定或缓慢的连接/传输。相关信息:[vLLM Issue #16660](https://github.com/vllm-project/vllm/issues/16660), [vLLM PR #16226](https://github.com/vllm-project/vllm/pull/16226)。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/npugraph_ex.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/npugraph_ex.po new file mode 100644 index 00000000..b01f0a5e --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/npugraph_ex.po @@ -0,0 +1,61 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/npugraph_ex.md:1 +msgid "Npugraph_ex" +msgstr "Npugraph_ex" + +#: ../../source/user_guide/feature_guide/npugraph_ex.md:3 +msgid "Introduction" +msgstr "简介" + +#: ../../source/user_guide/feature_guide/npugraph_ex.md:5 +msgid "" +"As introduced in the [RFC](https://github.com/vllm-project/vllm-" +"ascend/issues/4715), this is a simple ACLGraph graph mode acceleration " +"solution based on Fx graphs." +msgstr "" +"如 [RFC](https://github.com/vllm-project/vllm-ascend/issues/4715) 中所述,这是一个基于 Fx 图的简单 ACLGraph 图模式加速解决方案。" + +#: ../../source/user_guide/feature_guide/npugraph_ex.md:7 +msgid "Using npugraph_ex" +msgstr "使用 npugraph_ex" + +#: ../../source/user_guide/feature_guide/npugraph_ex.md:9 +msgid "" +"Npugraph_ex will be enabled by default in the future, Take Qwen series " +"models as an example to show how to configure it." +msgstr "Npugraph_ex 将在未来默认启用,以 Qwen 系列模型为例展示如何配置。" + +#: ../../source/user_guide/feature_guide/npugraph_ex.md:11 +msgid "Offline example:" +msgstr "离线示例:" + +#: ../../source/user_guide/feature_guide/npugraph_ex.md:28 +msgid "Online example:" +msgstr "在线示例:" + +#: ../../source/user_guide/feature_guide/npugraph_ex.md:35 +msgid "" +"You can find more details about " +"[npugraph_ex](https://www.hiascend.com/document/detail/zh/Pytorch/730/modthirdparty/torchairuseguide/torchair_00021.html)" +msgstr "" +"您可以在 [npugraph_ex](https://www.hiascend.com/document/detail/zh/Pytorch/730/modthirdparty/torchairuseguide/torchair_00021.html) 找到更多详细信息。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po index d942cfbf..bac7c1f5 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po @@ -4,180 +4,201 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../user_guide/feature_guide/quantization.md:1 +#: ../../source/user_guide/feature_guide/quantization.md:1 msgid "Quantization Guide" msgstr "量化指南" -#: ../../user_guide/feature_guide/quantization.md:3 +#: ../../source/user_guide/feature_guide/quantization.md:3 msgid "" -"Model quantization is a technique that reduces the size and computational " -"requirements of a model by lowering the data precision of the weights and " -"activation values in the model, thereby saving the memory and improving the " -"inference speed." +"Model quantization is a technique that reduces model size and " +"computational overhead by lowering the numerical precision of weights and" +" activations, thereby saving memory and improving inference speed." msgstr "模型量化是一种通过降低模型中权重和激活值的数据精度,从而减少模型大小和计算需求的技术,这样可以节省内存并提高推理速度。" -#: ../../user_guide/feature_guide/quantization.md:5 +#: ../../source/user_guide/feature_guide/quantization.md:5 msgid "" -"Since 0.9.0rc2 version, quantization feature is experimentally supported in " -"vLLM Ascend. Users can enable quantization feature by specifying " -"`--quantization ascend`. Currently, only Qwen, DeepSeek series models are " -"well tested. We’ll support more quantization algorithm and models in the " -"future." -msgstr "" -"自 0.9.0rc2 版本起,vLLM Ascend 实验性地支持量化特性。用户可以通过指定 `--quantization ascend` " -"启用量化功能。目前,只有 Qwen、DeepSeek 系列模型经过了充分测试。未来我们将支持更多的量化算法和模型。" +"`vLLM Ascend` supports multiple quantization methods. This guide provides" +" instructions for using different quantization tools and running " +"quantized models on vLLM Ascend." +msgstr "`vLLM Ascend` 支持多种量化方法。本指南提供了使用不同量化工具以及在 vLLM Ascend 上运行量化模型的说明。" -#: ../../user_guide/feature_guide/quantization.md:7 -msgid "Install modelslim" -msgstr "安装 modelslim" +#: ../../source/user_guide/feature_guide/quantization.md:7 +msgid "**Note**" +msgstr "**注意**" -#: ../../user_guide/feature_guide/quantization.md:9 +#: ../../source/user_guide/feature_guide/quantization.md:9 +msgid "" +"You can choose to convert the model yourself or use the quantized model " +"we uploaded. See . Before you quantize a model, ensure sufficient RAM is " +"available." +msgstr "您可以选择自行转换模型,或使用我们上传的量化模型。请参阅 。在对模型进行量化之前,请确保有足够的可用内存。" + +#: ../../source/user_guide/feature_guide/quantization.md:13 +msgid "Quantization Tools" +msgstr "量化工具" + +#: ../../source/user_guide/feature_guide/quantization.md:15 +msgid "" +"vLLM Ascend supports models quantized by two main tools: `ModelSlim` and " +"`LLM-Compressor`." +msgstr "vLLM Ascend 支持由两种主要工具量化的模型:`ModelSlim` 和 `LLM-Compressor`。" + +#: ../../source/user_guide/feature_guide/quantization.md:17 +msgid "1. ModelSlim (Recommended)" +msgstr "1. ModelSlim (推荐)" + +#: ../../source/user_guide/feature_guide/quantization.md:19 msgid "" -"To quantize a model, users should install " "[ModelSlim](https://gitcode.com/Ascend/msit/blob/master/msmodelslim/README.md)" -" which is the Ascend compression and acceleration tool. It is an affinity-" -"based compression tool designed for acceleration, using compression as its " -"core technology and built upon the Ascend platform." -msgstr "" -"要对模型进行量化,用户应安装[ModelSlim](https://gitcode.com/Ascend/msit/blob/master/msmodelslim/README.md),这是昇腾的压缩与加速工具。它是一种基于亲和性的压缩工具,专为加速设计,以压缩为核心技术,并基于昇腾平台构建。" +" is an Ascend-friendly compression tool focused on acceleration, using " +"compression techniques, and built for Ascend hardware. It includes a " +"series of inference optimization technologies such as quantization and " +"compression, aiming to accelerate large language dense models, MoE " +"models, multimodal understanding models, multimodal generation models, " +"etc." +msgstr "[ModelSlim](https://gitcode.com/Ascend/msit/blob/master/msmodelslim/README.md) 是一款面向昇腾硬件的压缩工具,专注于加速,采用压缩技术构建。它包含一系列推理优化技术,如量化和压缩,旨在加速大型语言密集模型、MoE 模型、多模态理解模型、多模态生成模型等。" -#: ../../user_guide/feature_guide/quantization.md:11 +#: ../../source/user_guide/feature_guide/quantization.md:21 +#: ../../source/user_guide/feature_guide/quantization.md:67 +msgid "Installation" +msgstr "安装" + +#: ../../source/user_guide/feature_guide/quantization.md:23 msgid "" -"Currently, only the specific tag [modelslim-" -"VLLM-8.1.RC1.b020_001](https://gitcode.com/Ascend/msit/blob/modelslim-" -"VLLM-8.1.RC1.b020_001/msmodelslim/README.md) of modelslim works with vLLM " -"Ascend. Please do not install other version until modelslim master version " -"is available for vLLM Ascend in the future." -msgstr "" -"目前,只有 modelslim 的特定标签 [modelslim-" -"VLLM-8.1.RC1.b020_001](https://gitcode.com/Ascend/msit/blob/modelslim-" -"VLLM-8.1.RC1.b020_001/msmodelslim/README.md) 支持 vLLM Ascend。在未来 modelslim " -"的主版本支持 vLLM Ascend 之前,请不要安装其他版本。" +"To use ModelSlim for model quantization, install it from its [Git " +"repository](https://gitcode.com/Ascend/msit):" +msgstr "要使用 ModelSlim 进行模型量化,请从其 [Git 仓库](https://gitcode.com/Ascend/msit) 安装:" -#: ../../user_guide/feature_guide/quantization.md:13 -msgid "Install modelslim:" -msgstr "安装 modelslim:" +#: ../../source/user_guide/feature_guide/quantization.md:34 +#: ../../source/user_guide/feature_guide/quantization.md:73 +msgid "Model Quantization" +msgstr "模型量化" -#: ../../user_guide/feature_guide/quantization.md:21 -msgid "Quantize model" -msgstr "量化模型" - -#: ../../user_guide/feature_guide/quantization.md:23 -#, python-format +#: ../../source/user_guide/feature_guide/quantization.md:36 msgid "" -"Take [DeepSeek-V2-Lite](https://modelscope.cn/models/deepseek-" -"ai/DeepSeek-V2-Lite) as an example, you just need to download the model, and" -" then execute the convert command. The command is shown below. More info can" -" be found in modelslim doc [deepseek w8a8 dynamic quantization " -"docs](https://gitcode.com/Ascend/msit/blob/modelslim-" -"VLLM-8.1.RC1.b020_001/msmodelslim/example/DeepSeek/README.md#deepseek-v2-w8a8-dynamic%E9%87%8F%E5%8C%96)." -msgstr "" -"以 [DeepSeek-V2-Lite](https://modelscope.cn/models/deepseek-" -"ai/DeepSeek-V2-Lite) 为例,你只需要下载模型,然后执行转换命令。命令如下所示。更多信息可参考 modelslim 文档 " -"[deepseek w8a8 动态量化文档](https://gitcode.com/Ascend/msit/blob/modelslim-" -"VLLM-8.1.RC1.b020_001/msmodelslim/example/DeepSeek/README.md#deepseek-v2-w8a8-dynamic%E9%87%8F%E5%8C%96)。" +"The following example shows how to generate W8A8 quantized weights for " +"the [Qwen3-MoE " +"model](https://gitcode.com/Ascend/msit/blob/master/msmodelslim/example/Qwen3-MOE/README.md)." +msgstr "以下示例展示了如何为 [Qwen3-MoE 模型](https://gitcode.com/Ascend/msit/blob/master/msmodelslim/example/Qwen3-MOE/README.md) 生成 W8A8 量化权重。" -#: ../../user_guide/feature_guide/quantization.md:32 +#: ../../source/user_guide/feature_guide/quantization.md:38 +msgid "**Quantization Script:**" +msgstr "**量化脚本:**" + +#: ../../source/user_guide/feature_guide/quantization.md:59 msgid "" -"You can also download the quantized model that we uploaded. Please note that" -" these weights should be used for test only. For example, " -"https://www.modelscope.cn/models/vllm-ascend/DeepSeek-V2-Lite-W8A8" -msgstr "" -"你也可以下载我们上传的量化模型。请注意,这些权重仅应用于测试。例如:https://www.modelscope.cn/models/vllm-" -"ascend/DeepSeek-V2-Lite-W8A8" +"After quantization completes, the output directory will contain the " +"quantized model files." +msgstr "量化完成后,输出目录将包含量化后的模型文件。" -#: ../../user_guide/feature_guide/quantization.md:35 -msgid "Once convert action is done, there are two important files generated." -msgstr "转换操作完成后,会生成两个重要的文件。" - -#: ../../user_guide/feature_guide/quantization.md:37 +#: ../../source/user_guide/feature_guide/quantization.md:61 msgid "" -"[config.json](https://www.modelscope.cn/models/vllm-" -"ascend/DeepSeek-V2-Lite-W8A8/file/view/master/config.json?status=1). Please " -"make sure that there is no `quantization_config` field in it." -msgstr "" -"[config.json](https://www.modelscope.cn/models/vllm-" -"ascend/DeepSeek-V2-Lite-W8A8/file/view/master/config.json?status=1)。请确保其中没有 " -"`quantization_config` 字段。" +"For more examples, refer to the [official " +"examples](https://gitcode.com/Ascend/msit/tree/master/msmodelslim/example)." +msgstr "更多示例,请参考 [官方示例](https://gitcode.com/Ascend/msit/tree/master/msmodelslim/example)。" -#: ../../user_guide/feature_guide/quantization.md:39 +#: ../../source/user_guide/feature_guide/quantization.md:63 +msgid "2. LLM-Compressor" +msgstr "2. LLM-Compressor" + +#: ../../source/user_guide/feature_guide/quantization.md:65 msgid "" -"[quant_model_description.json](https://www.modelscope.cn/models/vllm-" -"ascend/DeepSeek-V2-Lite-W8A8/file/view/master/quant_model_description.json?status=1)." -" All the converted weights info are recorded in this file." -msgstr "" -"[quant_model_description.json](https://www.modelscope.cn/models/vllm-" -"ascend/DeepSeek-V2-Lite-W8A8/file/view/master/quant_model_description.json?status=1)。所有被转换的权重信息都记录在该文件中。" +"[LLM-Compressor](https://github.com/vllm-project/llm-compressor) is a " +"unified compressed model library for faster vLLM inference." +msgstr "[LLM-Compressor](https://github.com/vllm-project/llm-compressor) 是一个统一的压缩模型库,用于加速 vLLM 推理。" -#: ../../user_guide/feature_guide/quantization.md:41 -msgid "Here is the full converted model files:" -msgstr "以下是完整转换后的模型文件:" +#: ../../source/user_guide/feature_guide/quantization.md:75 +msgid "`LLM-Compressor` provides various quantization scheme examples." +msgstr "`LLM-Compressor` 提供了多种量化方案的示例。" -#: ../../user_guide/feature_guide/quantization.md:60 -msgid "Run the model" -msgstr "运行模型" +#: ../../source/user_guide/feature_guide/quantization.md:77 +msgid "Dense Quantization" +msgstr "密集模型量化" -#: ../../user_guide/feature_guide/quantization.md:62 +#: ../../source/user_guide/feature_guide/quantization.md:79 +msgid "An example to generate W8A8 dynamic quantized weights for dense model:" +msgstr "为密集模型生成 W8A8 动态量化权重的示例:" + +#: ../../source/user_guide/feature_guide/quantization.md:89 +msgid "MoE Quantization" +msgstr "MoE 模型量化" + +#: ../../source/user_guide/feature_guide/quantization.md:91 +msgid "An example to generate W8A8 dynamic quantized weights for MoE model:" +msgstr "为 MoE 模型生成 W8A8 动态量化权重的示例:" + +#: ../../source/user_guide/feature_guide/quantization.md:101 msgid "" -"Now, you can run the quantized models with vLLM Ascend. Here is the example " -"for online and offline inference." -msgstr "现在,你可以使用 vLLM Ascend 运行量化模型。下面是在线和离线推理的示例。" +"For more content, refer to the [official examples](https://github.com" +"/vllm-project/llm-compressor/tree/main/examples)." +msgstr "更多内容,请参考 [官方示例](https://github.com/vllm-project/llm-compressor/tree/main/examples)。" -#: ../../user_guide/feature_guide/quantization.md:64 -msgid "Offline inference" +#: ../../source/user_guide/feature_guide/quantization.md:103 +msgid "" +"Currently supported quantization types by LLM-Compressor: `W8A8` and " +"`W8A8_DYNAMIC`." +msgstr "LLM-Compressor 当前支持的量化类型:`W8A8` 和 `W8A8_DYNAMIC`。" + +#: ../../source/user_guide/feature_guide/quantization.md:105 +msgid "Running Quantized Models" +msgstr "运行量化模型" + +#: ../../source/user_guide/feature_guide/quantization.md:107 +msgid "" +"Once you have a quantized model which is generated by **ModelSlim**, you " +"can use vLLM Ascend for inference by specifying the `--quantization " +"ascend` parameter to enable quantization features, while for models " +"quantized by **LLM-Compressor**, do not need to add this parameter." +msgstr "一旦您拥有由 **ModelSlim** 生成的量化模型,您可以通过指定 `--quantization ascend` 参数来使用 vLLM Ascend 进行推理以启用量化功能。而对于由 **LLM-Compressor** 量化的模型,则无需添加此参数。" + +#: ../../source/user_guide/feature_guide/quantization.md:109 +msgid "Offline Inference" msgstr "离线推理" -#: ../../user_guide/feature_guide/quantization.md:90 -msgid "Online inference" +#: ../../source/user_guide/feature_guide/quantization.md:143 +msgid "Online Inference" msgstr "在线推理" -#: ../../user_guide/feature_guide/quantization.md:97 -msgid "FAQs" -msgstr "常见问题解答" +#: ../../source/user_guide/feature_guide/quantization.md:158 +msgid "References" +msgstr "参考" -#: ../../user_guide/feature_guide/quantization.md:99 +#: ../../source/user_guide/feature_guide/quantization.md:160 msgid "" -"1. How to solve the KeyError: 'xxx.layers.0.self_attn.q_proj.weight' " -"problem?" -msgstr "1. 如何解决 KeyError: 'xxx.layers.0.self_attn.q_proj.weight' 问题?" +"[ModelSlim " +"Documentation](https://gitcode.com/Ascend/msit/blob/master/msmodelslim/README.md)" +msgstr "[ModelSlim 文档](https://gitcode.com/Ascend/msit/blob/master/msmodelslim/README.md)" -#: ../../user_guide/feature_guide/quantization.md:101 -msgid "" -"First, make sure you specify `ascend` quantization method. Second, check if " -"your model is converted by this `modelslim-VLLM-8.1.RC1.b020_001` modelslim " -"version. Finally, if it still doesn't work, please submit a issue, maybe " -"some new models need to be adapted." -msgstr "" -"首先,请确保你指定了 `ascend` 量化方法。其次,检查你的模型是否由 `modelslim-VLLM-8.1.RC1.b020_001` 这个 " -"modelslim 版本转换。如果仍然无法使用,请提交一个 issue,可能有一些新模型需要适配。" +#: ../../source/user_guide/feature_guide/quantization.md:161 +msgid "[LLM-Compressor GitHub](https://github.com/vllm-project/llm-compressor)" +msgstr "[LLM-Compressor GitHub](https://github.com/vllm-project/llm-compressor)" -#: ../../user_guide/feature_guide/quantization.md:104 -msgid "" -"2. How to solve the error \"Could not locate the " -"configuration_deepseek.py\"?" -msgstr "2. 如何解决“无法找到 configuration_deepseek.py”错误?" +#: ../../source/user_guide/feature_guide/quantization.md:162 +msgid "[vLLM Quantization Guide](https://docs.vllm.ai/en/latest/quantization/)" +msgstr "[vLLM 量化指南](https://docs.vllm.ai/en/latest/quantization/)" -#: ../../user_guide/feature_guide/quantization.md:106 -msgid "" -"Please convert DeepSeek series models using `modelslim-" -"VLLM-8.1.RC1.b020_001` modelslim, this version has fixed the missing " -"configuration_deepseek.py error." -msgstr "" -"请使用 `modelslim-VLLM-8.1.RC1.b020_001` 的 modelslim 转换 DeepSeek 系列模型,该版本已修复缺少 " -"configuration_deepseek.py 的错误。" +#~ msgid "" +#~ "Please convert DeepSeek series models " +#~ "using `modelslim-VLLM-8.1.RC1.b020_001` modelslim," +#~ " this version has fixed the missing" +#~ " configuration_deepseek.py error." +#~ msgstr "" +#~ "请使用 `modelslim-VLLM-8.1.RC1.b020_001` 版本的 " +#~ "modelslim 转换 DeepSeek 系列模型,该版本已修复缺少 " +#~ "configuration_deepseek.py 文件的错误。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/rfork.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/rfork.po new file mode 100644 index 00000000..0b128768 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/rfork.po @@ -0,0 +1,386 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/rfork.md:1 +msgid "RFork Guide" +msgstr "RFork 指南" + +#: ../../source/user_guide/feature_guide/rfork.md:3 +msgid "" +"This guide explains how to use **RFork** as a model-loader plugin in " +"**vLLM Ascend**." +msgstr "本指南介绍如何在 **vLLM Ascend** 中使用 **RFork** 作为模型加载器插件。" + +#: ../../source/user_guide/feature_guide/rfork.md:7 +msgid "Overview" +msgstr "概述" + +#: ../../source/user_guide/feature_guide/rfork.md:9 +msgid "" +"RFork is a warm-start weight loading path for vLLM Ascend. Instead of " +"always reading model weights from storage, a new instance can request a " +"compatible **seed** instance from an external planner, then pull weights " +"directly from that seed through `YuanRong TransferEngine`." +msgstr "RFork 是 vLLM Ascend 的一种热启动权重加载路径。新实例无需总是从存储中读取模型权重,而是可以从外部规划器请求一个兼容的 **种子** 实例,然后通过 `YuanRong TransferEngine` 直接从该种子拉取权重。" + +#: ../../source/user_guide/feature_guide/rfork.md:11 +msgid "The RFork loading flow in the current implementation is:" +msgstr "当前实现中,RFork 的加载流程如下:" + +#: ../../source/user_guide/feature_guide/rfork.md:13 +msgid "vLLM starts with `--load-format rfork`." +msgstr "vLLM 以 `--load-format rfork` 参数启动。" + +#: ../../source/user_guide/feature_guide/rfork.md:14 +msgid "" +"RFork builds a **seed key** from the model identity and deployment " +"topology." +msgstr "RFork 根据模型标识和部署拓扑构建一个 **种子键**。" + +#: ../../source/user_guide/feature_guide/rfork.md:15 +msgid "RFork asks the planner for an available seed matching that key." +msgstr "RFork 向规划器请求一个与该键匹配的可用种子。" + +#: ../../source/user_guide/feature_guide/rfork.md:16 +msgid "" +"If a seed is returned, the new instance initializes the model structure " +"on its local NPU, registers local weight memory, fetches the remote " +"transfer-engine metadata from the seed, and performs batch weight " +"transfer into local parameter buffers." +msgstr "如果返回了一个种子,新实例将在其本地 NPU 上初始化模型结构,注册本地权重内存,从种子获取远程传输引擎的元数据,并执行批量权重传输到本地参数缓冲区。" + +#: ../../source/user_guide/feature_guide/rfork.md:17 +msgid "" +"If no seed is available, or any step fails, RFork cleans up and falls " +"back to the default loader." +msgstr "如果没有可用种子,或任何步骤失败,RFork 将进行清理并回退到默认加载器。" + +#: ../../source/user_guide/feature_guide/rfork.md:18 +msgid "" +"After the instance finishes loading, it starts a local seed service and " +"periodically reports heartbeat to the planner, so later instances can " +"reuse it." +msgstr "实例完成加载后,会启动一个本地种子服务,并定期向规划器发送心跳,以便后续实例可以复用该实例。" + +#: ../../source/user_guide/feature_guide/rfork.md:20 +msgid "Flowchart" +msgstr "流程图" + +#: ../../source/user_guide/feature_guide/rfork.md:22 +msgid "![rfork flowchart](./images/rfork_flowchart.jpg)" +msgstr "![rfork 流程图](./images/rfork_flowchart.jpg)" + +#: ../../source/user_guide/feature_guide/rfork.md:22 +msgid "rfork flowchart" +msgstr "rfork 流程图" + +#: ../../source/user_guide/feature_guide/rfork.md:24 +msgid "Application Scenarios" +msgstr "应用场景" + +#: ../../source/user_guide/feature_guide/rfork.md:26 +msgid "" +"**Scale-out after a first successful load**: The first instance may still" +" load from storage, but later instances with the same deployment identity" +" can reuse it as a seed and shorten startup time." +msgstr "**首次成功加载后的横向扩展**:第一个实例可能仍需从存储加载,但后续具有相同部署标识的实例可以将其作为种子复用,从而缩短启动时间。" + +#: ../../source/user_guide/feature_guide/rfork.md:27 +msgid "" +"**Elastic serving clusters**: Because RFork asks a planner for available " +"seeds, it fits clusters where instances are created and reclaimed " +"dynamically." +msgstr "**弹性服务集群**:由于 RFork 会向规划器请求可用种子,因此它适用于实例动态创建和回收的集群。" + +#: ../../source/user_guide/feature_guide/rfork.md:28 +msgid "" +"**Topology-sensitive deployments**: RFork encodes `kv_role`, `node_rank`," +" `tp_rank`, and optional `draft` role into the seed key, so only " +"topology-compatible instances are matched together." +msgstr "**拓扑敏感的部署**:RFork 将 `kv_role`、`node_rank`、`tp_rank` 以及可选的 `draft` 角色编码到种子键中,因此只有拓扑兼容的实例才会被匹配在一起。" + +#: ../../source/user_guide/feature_guide/rfork.md:32 +msgid "Usage" +msgstr "使用方法" + +#: ../../source/user_guide/feature_guide/rfork.md:34 +msgid "" +"To enable RFork, pass `--load-format rfork` and provide RFork settings " +"through `--model-loader-extra-config` as a JSON string." +msgstr "要启用 RFork,请传递 `--load-format rfork` 参数,并通过 `--model-loader-extra-config` 以 JSON 字符串的形式提供 RFork 设置。" + +#: ../../source/user_guide/feature_guide/rfork.md:36 +msgid "RFork Prerequisites" +msgstr "RFork 先决条件" + +#: ../../source/user_guide/feature_guide/rfork.md:38 +msgid "" +"Install the runtime dependency `YuanRong TransferEngine` on every RFork " +"instance." +msgstr "在每个 RFork 实例上安装运行时依赖 `YuanRong TransferEngine`。" + +#: ../../source/user_guide/feature_guide/rfork.md:39 +msgid "" +"Run a planner service that implements the RFork seed protocol. A simple " +"mock planner script is provided at " +"[`rfork_planner.py`](../../../../examples/rfork/rfork_planner.py)." +msgstr "运行一个实现了 RFork 种子协议的规划器服务。在 [`rfork_planner.py`](../../../../examples/rfork/rfork_planner.py) 提供了一个简单的模拟规划器脚本。" + +#: ../../source/user_guide/feature_guide/rfork.md:41 +msgid "Configuration Fields" +msgstr "配置字段" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "Field Name" +msgstr "字段名" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "Type" +msgstr "类型" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "Description" +msgstr "描述" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "Allowed Values / Notes" +msgstr "允许值 / 备注" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "**model_url**" +msgstr "**model_url**" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "String" +msgstr "字符串" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "Logical model identifier used to build the RFork seed key." +msgstr "用于构建 RFork 种子键的逻辑模型标识符。" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "" +"Required for RFork transfer. Instances that should share seeds must use " +"the same value." +msgstr "RFork 传输所必需。应共享种子的实例必须使用相同的值。" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "**model_deploy_strategy_name**" +msgstr "**model_deploy_strategy_name**" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "" +"Deployment strategy identifier used together with `model_url` to build " +"the seed key." +msgstr "部署策略标识符,与 `model_url` 一起用于构建种子键。" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "**rfork_scheduler_url**" +msgstr "**rfork_scheduler_url**" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "" +"Base URL of the planner service used for seed allocation, release, and " +"heartbeat." +msgstr "用于种子分配、释放和心跳的规划器服务的基础 URL。" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "Required for planner-based matching. Example: `http://127.0.0.1:1223`." +msgstr "基于规划器的匹配所必需。示例:`http://127.0.0.1:1223`。" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "**rfork_seed_timeout_sec**" +msgstr "**rfork_seed_timeout_sec**" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "Number" +msgstr "数字" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "" +"Timeout for waiting until the local seed HTTP service becomes healthy " +"after startup." +msgstr "启动后等待本地种子 HTTP 服务变为健康状态的超时时间。" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "Optional. Default: `30`. Must be greater than `0`." +msgstr "可选。默认值:`30`。必须大于 `0`。" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "**rfork_seed_key_separator**" +msgstr "**rfork_seed_key_separator**" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "Separator used when building the RFork seed key string." +msgstr "构建 RFork 种子键字符串时使用的分隔符。" + +#: ../../source/user_guide/feature_guide/rfork.md +msgid "Optional. Default: `$`. Keep the same value across compatible instances." +msgstr "可选。默认值:`$`。在兼容的实例间保持相同的值。" + +#: ../../source/user_guide/feature_guide/rfork.md:51 +msgid "How RFork Matches Seeds" +msgstr "RFork 如何匹配种子" + +#: ../../source/user_guide/feature_guide/rfork.md:53 +msgid "" +"RFork does not match instances by `model_url` alone. The local seed key " +"is composed from:" +msgstr "RFork 不仅通过 `model_url` 来匹配实例。本地种子键由以下部分组成:" + +#: ../../source/user_guide/feature_guide/rfork.md:55 +msgid "`model_url`" +msgstr "`model_url`" + +#: ../../source/user_guide/feature_guide/rfork.md:56 +msgid "`model_deploy_strategy_name`" +msgstr "`model_deploy_strategy_name`" + +#: ../../source/user_guide/feature_guide/rfork.md:57 +msgid "disaggregation mode derived from `kv_transfer_config.kv_role` or `kv_both`" +msgstr "从 `kv_transfer_config.kv_role` 或 `kv_both` 派生的解耦模式" + +#: ../../source/user_guide/feature_guide/rfork.md:58 +msgid "`node_rank`" +msgstr "`node_rank`" + +#: ../../source/user_guide/feature_guide/rfork.md:59 +msgid "`tp_rank`" +msgstr "`tp_rank`" + +#: ../../source/user_guide/feature_guide/rfork.md:60 +msgid "optional `draft` suffix when the worker runs as a draft model" +msgstr "当工作器作为草稿模型运行时,可选的 `draft` 后缀" + +#: ../../source/user_guide/feature_guide/rfork.md:62 +msgid "" +"This means two instances must agree on both model identity and deployment" +" topology before the planner will treat them as interchangeable seeds." +msgstr "这意味着两个实例必须在模型标识和部署拓扑上都达成一致,规划器才会将它们视为可互换的种子。" + +#: ../../source/user_guide/feature_guide/rfork.md:66 +msgid "Example Commands & Placeholders" +msgstr "示例命令与占位符" + +#: ../../source/user_guide/feature_guide/rfork.md:68 +msgid "Replace parts in `` `<...>` `` before running." +msgstr "运行前替换 `` `<...>` `` 中的部分。" + +#: ../../source/user_guide/feature_guide/rfork.md:70 +msgid "1. Install YuanRong TransferEngine" +msgstr "1. 安装 YuanRong TransferEngine" + +#: ../../source/user_guide/feature_guide/rfork.md:76 +msgid "2. Start the Planner" +msgstr "2. 启动规划器" + +#: ../../source/user_guide/feature_guide/rfork.md:78 +msgid "" +"A simple planner implementation is provided at " +"[`rfork_planner.py`](../../../../examples/rfork/rfork_planner.py)." +msgstr "在 [`rfork_planner.py`](../../../../examples/rfork/rfork_planner.py) 提供了一个简单的规划器实现。" + +#: ../../source/user_guide/feature_guide/rfork.md:86 +msgid "3. Start vLLM Instances" +msgstr "3. 启动 vLLM 实例" + +#: ../../source/user_guide/feature_guide/rfork.md:88 +msgid "" +"Use the same RFork startup command for both the first instance and later " +"instances in the same deployment." +msgstr "对于同一部署中的第一个实例和后续实例,使用相同的 RFork 启动命令。" + +#: ../../source/user_guide/feature_guide/rfork.md:90 +msgid "" +"For the first instance, the planner usually has no compatible seed yet, " +"so RFork falls back to the default loader. After loading finishes, that " +"instance starts its local seed service and reports itself to the planner." +msgstr "对于第一个实例,规划器通常还没有兼容的种子,因此 RFork 会回退到默认加载器。加载完成后,该实例会启动其本地种子服务,并向规划器报告自身。" + +#: ../../source/user_guide/feature_guide/rfork.md:92 +msgid "" +"For later instances, if the planner can allocate a compatible seed, RFork" +" will try to transfer weights from the existing seed instance before " +"falling back to the default loader." +msgstr "对于后续实例,如果规划器能分配一个兼容的种子,RFork 将尝试从现有的种子实例传输权重,然后再回退到默认加载器。" + +#: ../../source/user_guide/feature_guide/rfork.md:109 +msgid "Placeholder Descriptions" +msgstr "占位符说明" + +#: ../../source/user_guide/feature_guide/rfork.md:111 +msgid "``: Model path or model identifier passed to `vllm serve`." +msgstr "``:传递给 `vllm serve` 的模型路径或模型标识符。" + +#: ../../source/user_guide/feature_guide/rfork.md:112 +msgid "``: Service name exposed by vLLM." +msgstr "``:vLLM 暴露的服务名称。" + +#: ../../source/user_guide/feature_guide/rfork.md:113 +msgid "``: IP address or hostname of the RFork planner." +msgstr "``:RFork 规划器的 IP 地址或主机名。" + +#: ../../source/user_guide/feature_guide/rfork.md:114 +msgid "``: Listening port of the RFork planner." +msgstr "``:RFork 规划器的监听端口。" + +#: ../../source/user_guide/feature_guide/rfork.md:115 +msgid "" +"``: Stable model identity string used to build the RFork seed " +"key." +msgstr "``:用于构建 RFork 种子键的稳定模型标识字符串。" + +#: ../../source/user_guide/feature_guide/rfork.md:116 +msgid "" +"``: Stable deployment-strategy name used to build the " +"RFork seed key." +msgstr "``:用于构建 RFork 种子键的稳定部署策略名称。" + +#: ../../source/user_guide/feature_guide/rfork.md:117 +msgid "``: Serving port of the vLLM instance being started." +msgstr "``:正在启动的 vLLM 实例的服务端口。" + +#: ../../source/user_guide/feature_guide/rfork.md:121 +msgid "Note & Caveats" +msgstr "注意事项与限制" + +#: ../../source/user_guide/feature_guide/rfork.md:123 +msgid "" +"RFork requires `YuanRong TransferEngine` at runtime. If the package is " +"missing, RFork cannot initialize the transfer backend." +msgstr "RFork 在运行时需要 `YuanRong TransferEngine`。如果缺少该软件包,RFork 将无法初始化传输后端。" + +#: ../../source/user_guide/feature_guide/rfork.md:124 +msgid "" +"If RFORK is used, **each worker process** must bind a listening port. " +"That port is assigned randomly." +msgstr "" +"如果使用 RFORK,**每个工作进程**都必须绑定一个监听端口。该端口是随机分配的。" + +#: ../../source/user_guide/feature_guide/rfork.md:125 +msgid "" +"The example " +"[`rfork_planner.py`](../../../../examples/rfork/rfork_planner.py) is only" +" a simple mock implementation. If you need stronger scheduling, capacity " +"management, or production-grade availability behavior, implement your own" +" planner based on the RFork seed protocol." +msgstr "" +"示例 [`rfork_planner.py`](../../../../examples/rfork/rfork_planner.py) 仅是一个简单的模拟实现。如果您需要更强大的调度、容量管理或生产级可用性行为,请基于 RFork 种子协议实现您自己的规划器。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sequence_parallelism.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sequence_parallelism.po new file mode 100644 index 00000000..87491564 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sequence_parallelism.po @@ -0,0 +1,435 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:1 +msgid "Sequence Parallelism" +msgstr "序列并行" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:3 +msgid "What is Sequence Parallelism" +msgstr "什么是序列并行" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:5 +msgid "" +"Sequence Parallelism (SP) was first introduced in " +"[Megatron](https://arxiv.org/pdf/2205.05198), with the original intention" +" of reducing training activation memory. The core modification was " +"changing `Allreduce->LayerNorm` to `ReduceScatter->LayerNorm->Allgather`." +" This technique was later applied to inference by vllm. It should be " +"noted that splitting Allreduce into ReduceScatter and Allgather does not " +"inherently bring performance benefits; it reduces the computation load of" +" LayerNorm, but this gain is minimal. The real benefits of SP come from:" +msgstr "" +"序列并行(Sequence Parallelism,SP)最初由 " +"[Megatron](https://arxiv.org/pdf/2205.05198) 提出,其初衷是减少训练时的激活内存。核心改动是将 " +"`Allreduce->LayerNorm` 改为 `ReduceScatter->LayerNorm->Allgather`。这项技术后来被 vllm " +"应用于推理。需要注意的是,将 Allreduce 拆分为 ReduceScatter 和 Allgather 本身并不会带来性能收益;它减少了 LayerNorm " +"的计算量,但这种收益微乎其微。SP 的真正收益来自:" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:7 +msgid "" +"LLM inference deployment often uses quantization. Taking INT8 " +"quantization commonly used on NPUs as an example, after LayerNorm, a " +"Quant operator quantizes the hidden states from BF16 to INT8. The " +"communication volume of Allgather is halved, and the time consumption is " +"almost halved." +msgstr "" +"LLM 推理部署常使用量化。以 NPU 上常用的 INT8 量化为例,在 LayerNorm 之后,Quant 算子会将隐藏状态从 BF16 量化为 INT8。此时 " +"Allgather 的通信量减半,耗时也几乎减半。" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:8 +msgid "" +"ReduceScatter and Allgather can be fused with the preceding and following" +" Matmul operations respectively into communication-computation parallel " +"operators, reducing latency." +msgstr "ReduceScatter 和 Allgather 可以分别与前后 Matmul 操作融合为通信-计算并行算子,从而降低延迟。" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:10 +msgid "How to Use" +msgstr "如何使用" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:12 +msgid "" +"Currently, vllm-ascend has implemented Sequence Parallelism for VL-class " +"models based on the Inductor pass. It can be enabled in the following " +"way:" +msgstr "目前,vllm-ascend 已基于 Inductor pass 为 VL 类模型实现了序列并行。可以通过以下方式启用:" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:20 +msgid "" +"`\"enable_sp\"`: This is the switch for SP. Since SP relies on graph " +"mode, it is not supported in eager mode." +msgstr "`\"enable_sp\"`:这是 SP 的开关。由于 SP 依赖于图模式,因此在 eager 模式下不受支持。" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:21 +#, python-brace-format +msgid "" +"`sp_min_token_num` (from upstream vllm's `pass_config`): Based on our " +"experiments, when the number of tokens is small (empirical value is less " +"than 1000), SP can actually bring negative impact. This is because when " +"the communication volume is small, the fixed overhead of the " +"communication operator becomes the dominant factor. SP will only take " +"effect when `num_tokens >= sp_min_token_num`. **The default value is 1000" +" on Ascend, which generally does not need to be modified.** To customize," +" use `--compilation-config '{\"pass_config\": {\"enable_sp\": true, " +"\"sp_min_token_num\": 512}}'`. The value will be appended into " +"`compile_ranges_split_points`, which splits the graph compilation range " +"and checks whether the pass is applicable per range." +msgstr "" +"`sp_min_token_num`(来自上游 vllm 的 `pass_config`):根据我们的实验,当 token 数量较少(经验值小于 1000)时,SP " +"实际上可能带来负面影响。这是因为当通信量较小时,通信算子的固定开销成为主导因素。SP 仅在 `num_tokens >= sp_min_token_num` " +"时生效。**在 Ascend 上默认值为 1000,通常无需修改。** 如需自定义,请使用 `--compilation-config '{\"pass_config\": " +"{\"enable_sp\": true, \"sp_min_token_num\": 512}}'`。该值将被追加到 `compile_ranges_split_points` " +"中,用于分割图编译范围,并检查每个范围是否适用该 pass。" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:23 +msgid "" +"Without modifying `sp_min_token_num`, the simplest way and recommended " +"way to enable SP is:" +msgstr "在不修改 `sp_min_token_num` 的情况下,启用 SP 最简单且推荐的方式是:" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:31 +msgid "Difference Between SP and Flash Comm V1" +msgstr "SP 与 Flash Comm V1 的区别" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:33 +msgid "" +"[Flash Comm V1 (FC1)](https://gitcode.com/ascend-tribe/ascend-inference-" +"cluster/blob/main/FlashComm/ascend-inference-cluster-flashcomm.md) is an " +"enhanced version of Sequence Parallelism developed based on NPU. The " +"enhancements include:" +msgstr "" +"[Flash Comm V1 (FC1)](https://gitcode.com/ascend-tribe/ascend-inference-" +"cluster/blob/main/FlashComm/ascend-inference-cluster-flashcomm.md) 是基于 NPU " +"开发的序列并行增强版本。其增强包括:" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:35 +msgid "" +"For models using the MLA structure, Allgather is postponed until after " +"QKV projection, further reducing communication volume." +msgstr "对于使用 MLA 结构的模型,Allgather 被推迟到 QKV 投影之后,进一步减少了通信量。" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:36 +msgid "" +"For MoE models, Allgather is postponed until after Gating+DynamicQuant, " +"also aiming to reduce communication volume." +msgstr "对于 MoE 模型,Allgather 被推迟到 Gating+DynamicQuant 之后,同样旨在减少通信量。" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:38 +msgid "" +"FC1 is a unique optimization in vllm-ascend, currently implemented based " +"on Custom OP, but it is difficult to support VL-class models (reasons " +"detailed in [[RFC]: support sequence parallelism by " +"pass](https://github.com/vllm-project/vllm-ascend/issues/5712) ). " +"Therefore, currently FC1 and SP are complementary." +msgstr "" +"FC1 是 vllm-ascend 中独特的优化,目前基于 Custom OP 实现,但难以支持 VL 类模型(原因详见 [[RFC]: support " +"sequence parallelism by " +"pass](https://github.com/vllm-project/vllm-ascend/issues/5712))。因此,目前 FC1 和 SP " +"是互补的。" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:40 +msgid "Support Matrix" +msgstr "支持矩阵" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:42 +msgid "Without Quantization" +msgstr "无量化" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "VL + Dense" +msgstr "VL + 稠密" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "VL + MoE" +msgstr "VL + MoE" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "non-VL + Dense" +msgstr "非 VL + 稠密" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "non-VL + MoE" +msgstr "非 VL + MoE" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "graph" +msgstr "图模式" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "x" +msgstr "x" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "Flash Comm V1" +msgstr "Flash Comm V1" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "eager/graph" +msgstr "eager/图模式" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:49 +msgid "With Quantization" +msgstr "带量化" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:51 +msgid "SP currently does not support quantization and is under adaptation." +msgstr "SP 目前不支持量化,正在适配中。" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:58 +msgid "Pass Design" +msgstr "Pass 设计" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:60 +msgid "" +"When SP is enabled, the following passes run in order: " +"`SequenceParallelismPass` then `SequenceParallelismMoePass`." +msgstr "启用 SP 时,以下 pass 按顺序运行:先 `SequenceParallelismPass`,然后 `SequenceParallelismMoePass`。" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:62 +msgid "SequenceParallelismPass" +msgstr "SequenceParallelismPass" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:64 +msgid "" +"Runs `NoOpEliminationPass` first to eliminate redundant view-like " +"operations, then applies AllReduce-based patterns:" +msgstr "首先运行 `NoOpEliminationPass` 以消除冗余的类视图操作,然后应用基于 AllReduce 的模式:" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "Pattern" +msgstr "模式" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "Match" +msgstr "匹配" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "Replacement" +msgstr "替换" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "`MiddleAllReduceRMSNormPattern`" +msgstr "`MiddleAllReduceRMSNormPattern`" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "`all_reduce` + `layernorm`" +msgstr "`all_reduce` + `layernorm`" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "`reduce_scatter` + `layernorm` + `all_gather`" +msgstr "`reduce_scatter` + `layernorm` + `all_gather`" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "`LastAllReduceRMSNormPattern`" +msgstr "`LastAllReduceRMSNormPattern`" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "Same (last layer, no residual)" +msgstr "相同(最后一层,无残差)" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "Same" +msgstr "相同" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "`Qwen3VLMiddleAllReduceRMSNormPattern`" +msgstr "`Qwen3VLMiddleAllReduceRMSNormPattern`" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "`all_reduce` + add + `layernorm`" +msgstr "`all_reduce` + add + `layernorm`" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "" +"`reduce_scatter` + chunk(`deepstack_input_embeds`) + add + `layernorm` + " +"`all_gather`" +msgstr "`reduce_scatter` + chunk(`deepstack_input_embeds`) + add + `layernorm` + `all_gather`" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:72 +msgid "" +"**Why Qwen3 VL needs special handling by " +"Qwen3VLMiddleAllReduceRMSNormPattern**" +msgstr "**为什么 Qwen3 VL 需要 Qwen3VLMiddleAllReduceRMSNormPattern 特殊处理**" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:74 +msgid "" +"Qwen3-VL middle layers insert an extra add between `all_reduce` and " +"`layernorm`: `hidden_states=hidden_states + deepstack_input_embeds`. " +"Under SP, `hidden_states` (i.e., `input`) is reduced-scattered to shape " +"`[seq_len/tp, hidden]` per rank, while `deepstack_input_embeds` comes " +"from the vision/deepstack path and stays full-sequence `[seq_len, " +"hidden]` (typically replicated across TP ranks). Simply doing " +"`reduce_scatter(input) + deepstack_input_embeds` would cause a shape " +"mismatch. The fix is to chunk `deepstack_input_embeds` by `tp_size` so " +"each rank uses `add(reduce_scatter, " +"chunk(deepstack_input_embeds)[tp_rank])`, keeping shapes consistent " +"before `layernorm` and `all_gather`." +msgstr "" +"Qwen3-VL 的中间层在 `all_reduce` 和 `layernorm` 之间插入了一个额外的 add 操作:`hidden_states=hidden_states " +"+ deepstack_input_embeds`。在 SP 下,`hidden_states`(即 `input`)被 reduce-scatter " +"到每个 rank 的形状 `[seq_len/tp, hidden]`,而 `deepstack_input_embeds` 来自视觉/deepstack " +"路径,并保持全序列形状 `[seq_len, hidden]`(通常在 TP rank 间复制)。简单地执行 `reduce_scatter(input) + " +"deepstack_input_embeds` 会导致形状不匹配。解决方法是按 `tp_size` 对 `deepstack_input_embeds` 进行 " +"chunk,使得每个 rank 使用 `add(reduce_scatter, chunk(deepstack_input_embeds)[tp_rank])`,从而在 " +"`layernorm` 和 `all_gather` 之前保持形状一致。" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:77 +msgid "SequenceParallelismMoePass" +msgstr "SequenceParallelismMoePass" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:79 +msgid "" +"After `SequenceParallelismPass` applies, the MoE model computation graph " +"looks like:" +msgstr "应用 `SequenceParallelismPass` 后,MoE 模型的计算图如下所示:" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:81 +msgid "![AllGather EP computation graph](../../assets/sp_moe.png)" +msgstr "![AllGather EP 计算图](../../assets/sp_moe.png)" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:81 +msgid "AllGather EP computation graph" +msgstr "AllGather EP 计算图" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:83 +msgid "**Overview**" +msgstr "**概述**" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:85 +msgid "" +"**Postponing allgather**: Under SP, `residual` is chunked by tensor " +"parallelism. This causes a shape mismatch between hidden states and " +"residual in the next layer's layernorm: hidden states are gathered (full " +"sequence) while residual remains chunked. The fix is to move `all_gather`" +" to *after* layernorm so that layernorm operates on consistent shapes per" +" rank. `MiddleLayerAllgatherAddRMSNormPattern`, " +"`LastLayerAllgatherRMSNormPattern`, and " +"`Qwen3VLMiddleLayerAllgatherAddRMSNormPattern` are designed for this " +"purpose, each handling different layer and structure variants (see the " +"table below)." +msgstr "" +"**推迟 allgather**:在 SP 下,`residual` 被张量并行切分。这导致下一层 layernorm 中隐藏状态和残差的形状不匹配:隐藏状态被聚集(全序列),而残差保持切分状态。解决方法是将 " +"`all_gather` 移动到 layernorm *之后*,使得 layernorm 在每个 rank 上操作一致的形状。`MiddleLayerAllgatherAddRMSNormPattern`、`LastLayerAllgatherRMSNormPattern` " +"和 `Qwen3VLMiddleLayerAllgatherAddRMSNormPattern` 就是为此设计的,每个处理不同的层和结构变体(见下表)。" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:87 +msgid "" +"**AllGatherChunkNoOp cleanup**: When MoE SP is enabled, vllm introduces a" +" `sequence_parallel_chunk` op (corresponding to `sp_chunk` in the " +"diagram). Together with the preceding `all_gather`, the pair forms a " +"redundant no-op (all_gather gathers, then chunk re-splits). " +"`AllGatherChunkNoOpPattern` replaces this pair with identity to eliminate" +" the redundant communication and computation." +msgstr "" +"**AllGatherChunkNoOp 清理**:当启用 MoE SP 时,vllm 引入了一个 `sequence_parallel_chunk` 算子(对应图中的 " +"`sp_chunk`)。它与前面的 `all_gather` 一起形成了一个冗余的无操作(all_gather 聚集,然后 chunk 重新分割)。`AllGatherChunkNoOpPattern` " +"将这对操作替换为恒等操作,以消除冗余的通信和计算。" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:89 +msgid "**Pattern details:**" +msgstr "**模式详情:**" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "`MiddleLayerAllgatherAddRMSNormPattern`" +msgstr "`MiddleLayerAllgatherAddRMSNormPattern`" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "`all_gather` + slice + `layernorm`" +msgstr "`all_gather` + slice + `layernorm`" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "`layernorm` + `all_gather`" +msgstr "`layernorm` + `all_gather`" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "`LastLayerAllgatherRMSNormPattern`" +msgstr "`LastLayerAllgatherRMSNormPattern`" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "`Qwen3VLMiddleLayerAllgatherAddRMSNormPattern`" +msgstr "`Qwen3VLMiddleLayerAllgatherAddRMSNormPattern`" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "`all_gather` + slice + add + `layernorm`" +msgstr "`all_gather` + slice + add + `layernorm`" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "add(chunk) + `layernorm` + `all_gather`" +msgstr "add(chunk) + `layernorm` + `all_gather`" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "`AllGatherChunkNoOpPattern`" +msgstr "`AllGatherChunkNoOpPattern`" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "`all_gather` + `sequence_parallel_chunk_impl`" +msgstr "`all_gather` + `sequence_parallel_chunk_impl`" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md +msgid "identity (no-op)" +msgstr "恒等操作(无操作)" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:98 +msgid "FAQ" +msgstr "常见问题" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:100 +msgid "Q1: Is SP enabled by default?" +msgstr "Q1: SP 是否默认启用?" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:102 +msgid "" +"No, SP is not enabled by default. SP is currently in the experimental " +"stage and will be enabled by default in the future." +msgstr "不,SP 默认未启用。SP 目前处于实验阶段,未来将默认启用。" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:104 +msgid "The processing flow of `enable_sp` in the code is:" +msgstr "代码中 `enable_sp` 的处理流程如下:" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:106 +msgid "In `pass_config`, `enable_sp` and `sp_min_token_num` default to `None`" +msgstr "在 `pass_config` 中,`enable_sp` 和 `sp_min_token_num` 默认为 `None`" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:107 +msgid "" +"`NPUPlatform.apply_config_platform_defaults`: If `enable_sp` is `True` " +"and `sp_min_token_num` is None, set default `sp_min_token_num` (1000 for " +"Dense models, 1 for MoE models)" +msgstr "" +"`NPUPlatform.apply_config_platform_defaults`:如果 `enable_sp` 为 `True` 且 " +"`sp_min_token_num` 为 None,则设置默认的 `sp_min_token_num`(Dense 模型为 1000,MoE 模型为 1)" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:108 +msgid "" +"`VllmConfig._apply_optimization_level_defaults`: `enable_sp` is set to " +"`True` for dense models." +msgstr "" +"`VllmConfig._apply_optimization_level_defaults`:对于 Dense 模型,`enable_sp` 被设置为 `True`。" + +#: ../../source/user_guide/feature_guide/sequence_parallelism.md:109 +msgid "" +"`VllmConfig.__post_init__`: If `sp_min_token_num` is still `None`, then " +"`enable_sp` is set to `False`" +msgstr "" +"`VllmConfig.__post_init__`:如果 `sp_min_token_num` 仍为 `None`,则 `enable_sp` 被设置为 `False`" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po index a3bd1b21..96181e52 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po @@ -4,153 +4,139 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../user_guide/feature_guide/sleep_mode.md:1 +#: ../../source/user_guide/feature_guide/sleep_mode.md:1 msgid "Sleep Mode Guide" msgstr "睡眠模式指南" -#: ../../user_guide/feature_guide/sleep_mode.md:3 +#: ../../source/user_guide/feature_guide/sleep_mode.md:3 msgid "Overview" msgstr "概述" -#: ../../user_guide/feature_guide/sleep_mode.md:5 +#: ../../source/user_guide/feature_guide/sleep_mode.md:5 msgid "" -"Sleep Mode is an API designed to offload model weights and discard KV cache " -"from NPU memory. This functionality is essential for reinforcement learning " -"(RL) post-training workloads, particularly in online algorithms such as PPO," -" GRPO, or DPO. During training, the policy model typically performs auto-" -"regressive generation using inference engines like vLLM, followed by forward" -" and backward passes for optimization." +"Sleep Mode is an API designed to offload model weights and discard KV " +"cache from NPU memory. This functionality is essential for reinforcement " +"learning (RL) post-training workloads, particularly in online algorithms " +"such as PPO, GRPO, or DPO. During training, the policy model typically " +"performs autoregressive generation using inference engines like vLLM, " +"followed by forward and backward passes for optimization." msgstr "" -"Sleep Mode 是一个用于卸载模型权重并清除 NPU 内存中 KV 缓存的 API。此功能对于强化学习(RL)后训练任务尤其重要,特别是在 " -"PPO、GRPO 或 DPO 等在线算法中。在训练过程中,策略模型通常会使用像 vLLM " -"这样的推理引擎进行自回归生成,然后进行前向和反向传播以进行优化。" +"睡眠模式是一个专为从NPU内存中卸载模型权重并丢弃KV缓存而设计的API。此功能对于强化学习(RL)后训练工作负载至关重要,特别是在PPO、GRPO或DPO等在线算法中。在训练期间,策略模型通常使用vLLM等推理引擎执行自回归生成,随后进行前向和反向传播以完成优化。" -#: ../../user_guide/feature_guide/sleep_mode.md:7 +#: ../../source/user_guide/feature_guide/sleep_mode.md:7 msgid "" "Since the generation and training phases may employ different model " -"parallelism strategies, it becomes crucial to free KV cache and even offload" -" model parameters stored within vLLM during training. This ensures efficient" -" memory utilization and avoids resource contention on the NPU." +"parallelism strategies, it becomes crucial to free KV cache and even " +"offload model parameters stored within vLLM during training. This ensures" +" efficient memory utilization and avoids resource contention on the NPU." msgstr "" -"由于生成和训练阶段可能采用不同的模型并行策略,因此在训练过程中及时释放 KV 缓存,甚至卸载存储在 vLLM " -"内的模型参数变得至关重要。这可以确保内存的高效利用,并避免 NPU 上的资源争用。" +"由于生成阶段和训练阶段可能采用不同的模型并行策略,因此在训练期间释放KV缓存,甚至卸载存储在vLLM中的模型参数变得至关重要。这确保了高效的内存利用,并避免了NPU上的资源争用。" -#: ../../user_guide/feature_guide/sleep_mode.md:10 +#: ../../source/user_guide/feature_guide/sleep_mode.md:9 msgid "Getting started" -msgstr "快速上手" +msgstr "快速入门" -#: ../../user_guide/feature_guide/sleep_mode.md:12 +#: ../../source/user_guide/feature_guide/sleep_mode.md:11 #, python-brace-format msgid "" -"With `enable_sleep_mode=True`, the way we manage memory(malloc, free) in " -"vllm will under a specific memory pool, during loading model and initialize " -"kv_caches, we tag the memory as a map: `{\"weight\": data, \"kv_cache\": " -"data}`." +"With `enable_sleep_mode=True`, the way we manage memory (malloc, free) in" +" vllm is under a specific memory pool. During model loading and KV cache " +"initialization, we tag the memory as a map: `{\"weight\": data, " +"\"kv_cache\": data}`." msgstr "" -"当 `enable_sleep_mode=True` 时,我们在 vllm 中管理内存(malloc, " -"free)的方式会在一个特定的内存池下进行,在加载模型和初始化 kv_caches " -"期间,我们会将内存打上标签,组织成一个映射:`{\"weight\": data, \"kv_cache\": data}`。" +"当设置 `enable_sleep_mode=True` 时,我们在vllm中管理内存(分配、释放)的方式将在一个特定的内存池下进行。在模型加载和KV缓存初始化期间,我们将内存标记为一个映射:`{\"weight\": data, \"kv_cache\": data}`。" -#: ../../user_guide/feature_guide/sleep_mode.md:14 +#: ../../source/user_guide/feature_guide/sleep_mode.md:13 msgid "" -"The engine(v0/v1) supports two sleep levels to manage memory during idle " -"periods:" -msgstr "该引擎(v0/v1)支持两种睡眠等级,以在空闲期间管理内存:" +"The engine (v0/v1) supports two sleep levels to manage memory during idle" +" periods:" +msgstr "引擎(v0/v1)支持两种睡眠等级,用于在空闲期间管理内存:" -#: ../../user_guide/feature_guide/sleep_mode.md:16 +#: ../../source/user_guide/feature_guide/sleep_mode.md:15 msgid "Level 1 Sleep" msgstr "一级睡眠" -#: ../../user_guide/feature_guide/sleep_mode.md:17 +#: ../../source/user_guide/feature_guide/sleep_mode.md:16 msgid "Action: Offloads model weights and discards the KV cache." -msgstr "操作:卸载模型权重并清除KV缓存。" +msgstr "操作:卸载模型权重并丢弃KV缓存。" -#: ../../user_guide/feature_guide/sleep_mode.md:18 +#: ../../source/user_guide/feature_guide/sleep_mode.md:17 msgid "Memory: Model weights are moved to CPU memory; KV cache is forgotten." -msgstr "内存:模型权重被移动到CPU内存;KV缓存被清除。" +msgstr "内存:模型权重被移至CPU内存;KV缓存被清除。" -#: ../../user_guide/feature_guide/sleep_mode.md:19 +#: ../../source/user_guide/feature_guide/sleep_mode.md:18 msgid "Use Case: Suitable when reusing the same model later." -msgstr "用例:适用于之后需要重复使用同一个模型的情况。" +msgstr "用例:适用于后续需要复用同一模型的情况。" -#: ../../user_guide/feature_guide/sleep_mode.md:20 -msgid "" -"Note: Ensure sufficient CPU memory is available to hold the model weights." -msgstr "注意:请确保有足够的CPU内存来存储模型权重。" +#: ../../source/user_guide/feature_guide/sleep_mode.md:19 +msgid "Note: Ensure sufficient CPU memory is available to hold the model weights." +msgstr "注意:确保有足够的CPU内存来容纳模型权重。" -#: ../../user_guide/feature_guide/sleep_mode.md:22 +#: ../../source/user_guide/feature_guide/sleep_mode.md:21 msgid "Level 2 Sleep" msgstr "二级睡眠" -#: ../../user_guide/feature_guide/sleep_mode.md:23 +#: ../../source/user_guide/feature_guide/sleep_mode.md:22 msgid "Action: Discards both model weights and KV cache." msgstr "操作:同时丢弃模型权重和KV缓存。" -#: ../../user_guide/feature_guide/sleep_mode.md:24 -msgid "" -"Memory: The content of both the model weights and kv cache is forgotten." -msgstr "内存:模型权重和kv缓存的内容都会被遗忘。" +#: ../../source/user_guide/feature_guide/sleep_mode.md:23 +msgid "Memory: The content of both the model weights and KV cache is forgotten." +msgstr "内存:模型权重和KV缓存的内容均被清除。" -#: ../../user_guide/feature_guide/sleep_mode.md:25 +#: ../../source/user_guide/feature_guide/sleep_mode.md:24 msgid "" -"Use Case: Ideal when switching to a different model or updating the current " -"one." -msgstr "用例:当切换到不同的模型或更新当前模型时非常理想。" +"Use Case: Ideal when switching to a different model or updating the " +"current one." +msgstr "用例:当需要切换到不同模型或更新当前模型时,此模式非常理想。" -#: ../../user_guide/feature_guide/sleep_mode.md:27 +#: ../../source/user_guide/feature_guide/sleep_mode.md:26 msgid "" "Since this feature uses the low-level API " "[AscendCL](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/82RC1alpha002/API/appdevgapi/appdevgapi_07_0000.html)," " in order to use sleep mode, you should follow the [installation " -"guide](https://vllm-ascend.readthedocs.io/en/latest/installation.html) and " -"building from source, if you are using v0.7.3, remember to set `export " -"COMPILE_CUSTOM_KERNELS=1`, for the latest version(v0.9.x+), the environment " -"variable `COMPILE_CUSTOM_KERNELS` will be set 1 by default while building " -"from source." +"guide](https://docs.vllm.ai/projects/ascend/en/latest/installation.html) " +"and build from source. If you are using < v0.12.0rc1, remember to set " +"`export COMPILE_CUSTOM_KERNELS=1`." msgstr "" -"由于此功能使用了底层 API " -"[AscendCL](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/82RC1alpha002/API/appdevgapi/appdevgapi_07_0000.html),为了使用休眠模式,你应按照[安装指南](https://vllm-" -"ascend.readthedocs.io/en/latest/installation.html)进行操作,并从源码编译。如果你使用的是 " -"v0.7.3,请记得设置 `export COMPILE_CUSTOM_KERNELS=1` ;对于最新版本(v0.9.x+),在从源码编译时环境变量 " -"`COMPILE_CUSTOM_KERNELS` 默认会被设置为 1。" +"由于此功能使用了底层API " +"[AscendCL](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/82RC1alpha002/API/appdevgapi/appdevgapi_07_0000.html),为了使用睡眠模式,您应遵循[安装指南](https://docs.vllm.ai/projects/ascend/en/latest/installation.html)并从源码构建。如果您使用的版本低于v0.12.0rc1,请记得设置 `export COMPILE_CUSTOM_KERNELS=1`。" -#: ../../user_guide/feature_guide/sleep_mode.md:29 +#: ../../source/user_guide/feature_guide/sleep_mode.md:28 msgid "Usage" msgstr "用法" -#: ../../user_guide/feature_guide/sleep_mode.md:31 +#: ../../source/user_guide/feature_guide/sleep_mode.md:30 msgid "The following is a simple example of how to use sleep mode." -msgstr "以下是如何使用睡眠模式的一个简单示例。" +msgstr "以下是一个如何使用睡眠模式的简单示例。" -#: ../../user_guide/feature_guide/sleep_mode.md:33 -msgid "offline inference:" +#: ../../source/user_guide/feature_guide/sleep_mode.md:32 +msgid "Offline inference:" msgstr "离线推理:" -#: ../../user_guide/feature_guide/sleep_mode.md:72 -msgid "online serving:" +#: ../../source/user_guide/feature_guide/sleep_mode.md:72 +msgid "Online serving:" msgstr "在线服务:" -#: ../../user_guide/feature_guide/sleep_mode.md:74 +#: ../../source/user_guide/feature_guide/sleep_mode.md:74 msgid "" -"Considering there may be a risk of malicious access, please make sure you " -"are under a dev-mode, and explicit specify the develop env: " -"`VLLM_SERVER_DEV_MODE` to expose these endpoints(sleep/wake up)." +"Considering there may be a risk of malicious access, please make sure you" +" are under a dev-mode, and explicitly specify the dev environment " +"`VLLM_SERVER_DEV_MODE` to expose these endpoints (sleep/wake up)." msgstr "" -"鉴于可能存在恶意访问的风险,请确保您处于开发模式,并明确指定开发环境:`VLLM_SERVER_DEV_MODE`,以便开放这些端点(sleep/wake" -" up)。" +"考虑到可能存在恶意访问的风险,请确保您处于开发模式,并明确指定开发环境变量 `VLLM_SERVER_DEV_MODE` 以开放这些端点(sleep/wake up)。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/speculative_decoding.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/speculative_decoding.po new file mode 100644 index 00000000..bd35ac63 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/speculative_decoding.po @@ -0,0 +1,164 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:1 +msgid "Speculative Decoding Guide" +msgstr "推测解码指南" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:3 +msgid "" +"This guide shows how to use Speculative Decoding with vLLM Ascend. " +"Speculative decoding is a technique which improves inter-token latency in" +" memory-bound LLM inference." +msgstr "本指南展示了如何在 vLLM Ascend 中使用推测解码。推测解码是一种技术,用于改善内存受限的 LLM 推理中的令牌间延迟。" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:5 +msgid "Speculating by matching n-grams in the prompt" +msgstr "通过匹配提示中的 n-gram 进行推测" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:7 +msgid "" +"The following code configures vLLM Ascend to use speculative decoding " +"where proposals are generated by matching n-grams in the prompt." +msgstr "以下代码配置 vLLM Ascend 使用推测解码,其中候选令牌通过匹配提示中的 n-gram 生成。" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:9 +#: ../../source/user_guide/feature_guide/speculative_decoding.md:42 +#: ../../source/user_guide/feature_guide/speculative_decoding.md:127 +msgid "Offline inference" +msgstr "离线推理" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:36 +msgid "Speculating using EAGLE based draft models" +msgstr "使用基于 EAGLE 的草稿模型进行推测" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:38 +msgid "" +"The following code configures vLLM Ascend to use speculative decoding " +"where proposals are generated by an [EAGLE (Extrapolation Algorithm for " +"Greater Language-model Efficiency)](https://arxiv.org/pdf/2401.15077) " +"based draft model." +msgstr "以下代码配置 vLLM Ascend 使用推测解码,其中候选令牌由基于 [EAGLE(用于提升语言模型效率的外推算法)](https://arxiv.org/pdf/2401.15077) 的草稿模型生成。" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:40 +msgid "" +"In v0.12.0rc1 of vLLM Ascend, the async scheduler is more stable and " +"ready to be enabled. We have adapted it to support EAGLE, and you can use" +" it by setting `async_scheduling=True` as follows. If you encounter any " +"issues, please feel free to open an issue on GitHub. As a workaround, you" +" can disable this feature by unsetting `async_scheduling=True` when " +"initializing the model." +msgstr "在 vLLM Ascend 的 v0.12.0rc1 版本中,异步调度器更加稳定并已准备就绪。我们已使其适配以支持 EAGLE,您可以通过如下设置 `async_scheduling=True` 来使用它。如果您遇到任何问题,请随时在 GitHub 上提交 issue。作为一种变通方案,您可以在初始化模型时不设置 `async_scheduling=True` 来禁用此功能。" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:74 +msgid "" +"A few important things to consider when using the EAGLE based draft " +"models:" +msgstr "使用基于 EAGLE 的草稿模型时,需要考虑以下几点重要事项:" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:76 +msgid "" +"The EAGLE draft models available in the [HF repository for EAGLE " +"models](https://huggingface.co/yuhuili) should be loaded and used " +"directly by vLLM. This functionality was added in PR " +"[#4893](https://github.com/vllm-project/vllm-ascend/pull/4893). If you " +"are using a vLLM version released before this pull request was merged, " +"please update to a more recent version." +msgstr "[EAGLE 模型的 HF 仓库](https://huggingface.co/yuhuili) 中可用的 EAGLE 草稿模型应由 vLLM 直接加载和使用。此功能在 PR [#4893](https://github.com/vllm-project/vllm-ascend/pull/4893) 中添加。如果您使用的 vLLM 版本是在此拉取请求合并之前发布的,请更新到较新的版本。" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:80 +msgid "" +"The EAGLE based draft models need to be run without tensor parallelism " +"(i.e. draft_tensor_parallel_size is set to 1 in `speculative_config`), " +"although it is possible to run the main model using tensor parallelism " +"(see example above)." +msgstr "基于 EAGLE 的草稿模型需要在没有张量并行的情况下运行(即在 `speculative_config` 中 `draft_tensor_parallel_size` 设置为 1),尽管主模型可以使用张量并行运行(参见上面的示例)。" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:84 +msgid "" +"When using EAGLE-3 based draft model, option \"method\" must be set to " +"\"eagle3\". That is, to specify `\"method\": \"eagle3\"` in " +"`speculative_config`." +msgstr "当使用基于 EAGLE-3 的草稿模型时,选项 \"method\" 必须设置为 \"eagle3\"。也就是说,在 `speculative_config` 中指定 `\"method\": \"eagle3\"`。" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:87 +msgid "" +"After enabling EAGLE, the main model needs to verify `(1 + K)` tokens " +"generated by the main model and the draft model in one decoding process. " +"And the fullgraph mode will fix the number of tokens during the " +"verification stage, so `cudagraph_capture_sizes` must be a list of " +"capture sizes, where each size is calculated as `n * (K + 1)` for each " +"batch size `n` you want to support. For instance, to support batch sizes " +"from 1 to 4 with `num_speculative_tokens = 4`, `cudagraph_capture_sizes` " +"should be set to `[5, 10, 15, 20]`." +msgstr "启用 EAGLE 后,主模型需要在一个解码过程中验证由主模型和草稿模型生成的 `(1 + K)` 个令牌。并且 fullgraph 模式将在验证阶段固定令牌数量,因此 `cudagraph_capture_sizes` 必须是一个捕获大小列表,其中每个大小计算为 `n * (K + 1)`,`n` 是您希望支持的每个批次大小。例如,要支持批次大小从 1 到 4 且 `num_speculative_tokens = 4`,`cudagraph_capture_sizes` 应设置为 `[5, 10, 15, 20]`。" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:92 +msgid "Speculating using MTP speculators" +msgstr "使用 MTP 推测器进行推测" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:94 +msgid "" +"The following code configures vLLM Ascend to use speculative decoding " +"where proposals are generated by MTP (Multi Token Prediction), boosting " +"inference performance by parallelizing the prediction of multiple tokens." +" For more information about MTP see " +"[Multi_Token_Prediction](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/Multi_Token_Prediction.html)" +msgstr "以下代码配置 vLLM Ascend 使用推测解码,其中候选令牌由 MTP(多令牌预测)生成,通过并行预测多个令牌来提升推理性能。有关 MTP 的更多信息,请参阅 [Multi_Token_Prediction](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/Multi_Token_Prediction.html)" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:96 +msgid "Online inference" +msgstr "在线推理" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:116 +msgid "Speculating using Suffix Decoding" +msgstr "使用后缀解码进行推测" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:118 +msgid "" +"The following code configures vLLM to use speculative decoding where " +"proposals are generated using Suffix Decoding [(SuffixDecoding: Extreme " +"Speculative Decoding for Emerging AI " +"Applications)](https://arxiv.org/abs/2411.04975)." +msgstr "以下代码配置 vLLM 使用推测解码,其中候选令牌使用后缀解码生成 [(SuffixDecoding: Extreme Speculative Decoding for Emerging AI Applications)](https://arxiv.org/abs/2411.04975)。" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:120 +msgid "" +"Like n-gram, Suffix Decoding can generate draft tokens by pattern-" +"matching using the last `n` generated tokens. Unlike n-gram, Suffix " +"Decoding (1) can pattern-match against both the prompt and previous " +"generations, (2) uses frequency counts to propose the most likely " +"continuations, and (3) speculates an adaptive number of tokens for each " +"request at each iteration to get better acceptance rates." +msgstr "与 n-gram 类似,后缀解码可以通过使用最后 `n` 个生成的令牌进行模式匹配来生成草稿令牌。与 n-gram 不同,后缀解码 (1) 可以针对提示和先前生成的内容进行模式匹配,(2) 使用频率计数来提出最可能的延续序列,(3) 在每次迭代中为每个请求推测自适应数量的令牌,以获得更好的接受率。" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:122 +msgid "" +"Suffix Decoding can achieve better performance for tasks with high " +"repetition, such as code-editing, agentic loops (e.g. self-reflection, " +"self-consistency), and RL rollouts." +msgstr "后缀解码可以在具有高重复性的任务上实现更好的性能,例如代码编辑、智能体循环(例如自我反思、自我一致性)和 RL 推演。" + +#: ../../source/user_guide/feature_guide/speculative_decoding.md:124 +msgid "" +"[!NOTE] Suffix Decoding requires Arctic Inference. You can install it " +"with `pip install arctic-inference`." +msgstr "[!注意] 后缀解码需要 Arctic Inference。您可以使用 `pip install arctic-inference` 安装它。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po index 636e59db..7c17b50f 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po @@ -4,217 +4,73 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../user_guide/feature_guide/structured_output.md:1 +#: ../../source/user_guide/feature_guide/structured_output.md:1 msgid "Structured Output Guide" msgstr "结构化输出指南" -#: ../../user_guide/feature_guide/structured_output.md:3 +#: ../../source/user_guide/feature_guide/structured_output.md:3 msgid "Overview" msgstr "概述" -#: ../../user_guide/feature_guide/structured_output.md:5 -msgid "What is Structured Output?" +#: ../../source/user_guide/feature_guide/structured_output.md:5 +msgid "What is structured output?" msgstr "什么是结构化输出?" -#: ../../user_guide/feature_guide/structured_output.md:7 +#: ../../source/user_guide/feature_guide/structured_output.md:7 msgid "" -"LLMs can be unpredictable when you need output in specific formats. Think of" -" asking a model to generate JSON - without guidance, it might produce valid " -"text that breaks JSON specification. **Structured Output (also called Guided" -" Decoding)** enables LLMs to generate outputs that follow a desired " -"structure while preserving the non-deterministic nature of the system." +"LLMs can be unpredictable when you need output in specific formats. Think" +" of asking a model to generate JSON without guidance, it might produce " +"valid text that breaks JSON specification. **Structured Output (also " +"known as Guided Decoding)** enables LLMs to generate outputs that follow " +"a desired structure while preserving the non-deterministic nature of the " +"system." msgstr "" -"当你需要特定格式输出时,大型语言模型(LLMs)可能表现出不可预测性。比如让模型生成 " -"JSON,如果没有指导,模型可能会生成有效的文本,但这些文本却不符合 JSON 规范。**结构化输出(也称为引导解码)** " -"能让大型语言模型生成符合预期结构的输出,同时保留系统的非确定性特性。" +"当您需要特定格式的输出时,大型语言模型(LLMs)的行为可能难以预测。试想一下,在没有指导的情况下要求模型生成" +" JSON,它可能会生成有效的文本,但却破坏了 JSON 规范。**结构化输出(也称为引导解码)** " +"使大型语言模型能够生成符合预期结构的输出,同时保留系统的非确定性特性。" -#: ../../user_guide/feature_guide/structured_output.md:9 +#: ../../source/user_guide/feature_guide/structured_output.md:9 msgid "" -"In simple terms, structured decoding gives LLMs a “template” to follow. " -"Users provide a schema that “influences” the model’s output, ensuring " +"In simple terms, structured decoding gives LLMs a \"template\" to follow." +" Users provide a schema that \"influences\" the model output, ensuring " "compliance with the desired structure." -msgstr "简单来说,结构化解码为LLM提供了一个“模板”来遵循。用户提供一个模式来“影响”模型的输出,从而确保输出符合期望的结构。" +msgstr "简而言之,结构化解码为大型语言模型提供了一个需要遵循的“模板”。用户提供一个“影响”模型输出的模式,以确保输出符合期望的结构。" -#: ../../user_guide/feature_guide/structured_output.md:11 +#: ../../source/user_guide/feature_guide/structured_output.md:11 msgid "![structured decoding](./images/structured_output_1.png)" msgstr "![结构化解码](./images/structured_output_1.png)" -#: ../../user_guide/feature_guide/structured_output.md:11 +#: ../../source/user_guide/feature_guide/structured_output.md:11 msgid "structured decoding" msgstr "结构化解码" -#: ../../user_guide/feature_guide/structured_output.md:13 -msgid "Structured Output in vllm-ascend" -msgstr "vllm-ascend 中的结构化输出" +#: ../../source/user_guide/feature_guide/structured_output.md:13 +msgid "Usage in vllm-ascend" +msgstr "在 vllm-ascend 中的使用" -#: ../../user_guide/feature_guide/structured_output.md:15 +#: ../../source/user_guide/feature_guide/structured_output.md:15 msgid "" -"Currently, vllm-ascend supports **xgrammar** and **guidance** backend for " -"structured output with vllm v1 engine." -msgstr "目前,vllm-ascend 支持 vllm v1 引擎的结构化输出,后端包括 **xgrammar** 和 **guidance**。" +"Currently, the usage of structured output feature in vllm-ascend is " +"totally the same as that in vllm." +msgstr "目前,vllm-ascend 中结构化输出功能的使用方式与 vllm 中完全相同。" -#: ../../user_guide/feature_guide/structured_output.md:17 +#: ../../source/user_guide/feature_guide/structured_output.md:17 msgid "" -"XGrammar introduces a new technique that batch constrained decoding via " -"pushdown automaton (PDA). You can think of a PDA as a “collection of FSMs, " -"and each FSM represents a context-free grammar (CFG).” One significant " -"advantage of PDA is its recursive nature, allowing us to execute multiple " -"state transitions. They also include additional optimisation (for those who " -"are interested) to reduce grammar compilation overhead. Besides, you can " -"also find more details about guidance by yourself." -msgstr "" -"XGrammar 引入了一种通过下推自动机(PDA)进行批量约束解码的新技术。你可以把 PDA 理解为“有限状态机(FSM)的集合,每个 FSM " -"代表一个上下文无关文法(CFG)。” PDA 的一个重要优点是其递归特性,使我们能够执行多次状态转移。此外,PDA " -"还包含了额外的优化(供感兴趣的用户参考),以减少语法编译的开销。除此之外,你还可以自己找到更多关于指导的信息。" - -#: ../../user_guide/feature_guide/structured_output.md:19 -msgid "How to Use Structured Output?" -msgstr "如何使用结构化输出?" - -#: ../../user_guide/feature_guide/structured_output.md:21 -msgid "Online Inference" -msgstr "在线推理" - -#: ../../user_guide/feature_guide/structured_output.md:23 -msgid "" -"You can also generate structured outputs using the OpenAI's Completions and " -"Chat API. The following parameters are supported, which must be added as " -"extra parameters:" -msgstr "你也可以使用 OpenAI 的 Completions 和 Chat API 生成结构化输出。支持以下参数,这些参数必须作为额外参数添加:" - -#: ../../user_guide/feature_guide/structured_output.md:25 -msgid "`guided_choice`: the output will be exactly one of the choices." -msgstr "`guided_choice`:输出将会是其中一个选项。" - -#: ../../user_guide/feature_guide/structured_output.md:26 -msgid "`guided_regex`: the output will follow the regex pattern." -msgstr "`guided_regex`:输出将遵循正则表达式模式。" - -#: ../../user_guide/feature_guide/structured_output.md:27 -msgid "`guided_json`: the output will follow the JSON schema." -msgstr "`guided_json`:输出将遵循 JSON 架构。" - -#: ../../user_guide/feature_guide/structured_output.md:28 -msgid "`guided_grammar`: the output will follow the context free grammar." -msgstr "`guided_grammar`:输出将遵循上下文无关文法。" - -#: ../../user_guide/feature_guide/structured_output.md:30 -msgid "" -"Structured outputs are supported by default in the OpenAI-Compatible Server." -" You can choose to specify the backend to use by setting the `--guided-" -"decoding-backend` flag to vllm serve. The default backend is `auto`, which " -"will try to choose an appropriate backend based on the details of the " -"request. You may also choose a specific backend, along with some options." -msgstr "" -"OpenAI 兼容服务器默认支持结构化输出。你可以通过设置 `--guided-decoding-backend` 标志为 vllm serve " -"来指定要使用的后端。默认后端为 `auto`,它会根据请求的详细信息尝试选择合适的后端。你也可以选择特定的后端,并设置一些选项。" - -#: ../../user_guide/feature_guide/structured_output.md:32 -msgid "" -"Now let´s see an example for each of the cases, starting with the " -"guided_choice, as it´s the easiest one:" -msgstr "现在让我们来看每种情况的示例,首先是 guided_choice,因为它是最简单的:" - -#: ../../user_guide/feature_guide/structured_output.md:51 -msgid "" -"The next example shows how to use the guided_regex. The idea is to generate " -"an email address, given a simple regex template:" -msgstr "下一个例子展示了如何使用 guided_regex。其思路是基于一个简单的正则表达式模板生成一个电子邮件地址:" - -#: ../../user_guide/feature_guide/structured_output.md:67 -msgid "" -"One of the most relevant features in structured text generation is the " -"option to generate a valid JSON with pre-defined fields and formats. For " -"this we can use the guided_json parameter in two different ways:" -msgstr "" -"在结构化文本生成中,最相关的特性之一是能够生成具有预定义字段和格式的有效 JSON。为此,我们可以通过两种不同的方式使用 guided_json 参数:" - -#: ../../user_guide/feature_guide/structured_output.md:69 -msgid "Using a JSON Schema." -msgstr "使用 JSON 架构。" - -#: ../../user_guide/feature_guide/structured_output.md:70 -msgid "Defining a Pydantic model and then extracting the JSON Schema from it." -msgstr "定义一个 Pydantic 模型,然后从中提取 JSON Schema。" - -#: ../../user_guide/feature_guide/structured_output.md:72 -msgid "" -"The next example shows how to use the guided_json parameter with a Pydantic " -"model:" -msgstr "下一个示例展示了如何将 guided_json 参数与 Pydantic 模型一起使用:" - -#: ../../user_guide/feature_guide/structured_output.md:104 -msgid "" -"Finally we have the guided_grammar option, which is probably the most " -"difficult to use, but it´s really powerful. It allows us to define complete " -"languages like SQL queries. It works by using a context free EBNF grammar. " -"As an example, we can use to define a specific format of simplified SQL " -"queries:" -msgstr "" -"最后,我们有 guided_grammar 选项,这可能是最难使用的,但它非常强大。它允许我们定义完整的语言,比如 SQL 查询。它通过使用上下文无关的" -" EBNF 语法来实现。例如,我们可以用它来定义一种简化 SQL 查询的特定格式:" - -#: ../../user_guide/feature_guide/structured_output.md:134 -msgid "" -"Find more examples [here](https://github.com/vllm-" -"project/vllm/blob/main/examples/offline_inference/structured_outputs.py)." -msgstr "" -"在[这里](https://github.com/vllm-" -"project/vllm/blob/main/examples/offline_inference/structured_outputs.py)可以找到更多示例。" - -#: ../../user_guide/feature_guide/structured_output.md:136 -msgid "Offline Inference" -msgstr "离线推理" - -#: ../../user_guide/feature_guide/structured_output.md:138 -msgid "" -"To use Structured Output, we'll need to configure the guided decoding using " -"the class `GuidedDecodingParams` inside `SamplingParams`. The main available" -" options inside `GuidedDecodingParams` are:" -msgstr "" -"要使用结构化输出,我们需要在 `SamplingParams` 内通过 `GuidedDecodingParams` " -"类配置引导解码。`GuidedDecodingParams` 中主要可用的选项有:" - -#: ../../user_guide/feature_guide/structured_output.md:140 -msgid "json" -msgstr "json" - -#: ../../user_guide/feature_guide/structured_output.md:141 -msgid "regex" -msgstr "正则表达式" - -#: ../../user_guide/feature_guide/structured_output.md:142 -msgid "choice" -msgstr "选择" - -#: ../../user_guide/feature_guide/structured_output.md:143 -msgid "grammar" -msgstr "语法" - -#: ../../user_guide/feature_guide/structured_output.md:145 -msgid "One example for the usage of the choice parameter is shown below:" -msgstr "choice 参数用法的一个示例如下:" - -#: ../../user_guide/feature_guide/structured_output.md:163 -msgid "" -"Find more examples of other usages [here](https://github.com/vllm-" -"project/vllm/blob/main/examples/offline_inference/structured_outputs.py)." -msgstr "" -"查看更多其他用法的示例 [在这里](https://github.com/vllm-" -"project/vllm/blob/main/examples/offline_inference/structured_outputs.py)。" +"Find more examples and explanations about these usages in [vLLM official " +"document](https://docs.vllm.ai/en/stable/features/structured_outputs/)." +msgstr "更多关于这些用法的示例和解释,请参阅 [vLLM 官方文档](https://docs.vllm.ai/en/stable/features/structured_outputs/)。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/ucm_deployment.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/ucm_deployment.po new file mode 100644 index 00000000..2c8d4a9a --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/ucm_deployment.po @@ -0,0 +1,219 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:1 +msgid "UCM-Enhanced Prefix Caching Deployment Guide" +msgstr "UCM增强前缀缓存部署指南" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:3 +msgid "Overview" +msgstr "概述" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:5 +msgid "" +"Unified Cache Management (UCM) provides an external KV-cache storage " +"layer designed for prefix-caching scenarios in vLLM/vLLM-Ascend. Unlike " +"KV Pooling, which expands prefix-cache capacity only by aggregating " +"device memory and therefore remains limited by HBM/DRAM size and lacks " +"persistence, UCM decouples compute from storage and adopts a tiered " +"design. Each node uses local DRAM as a fast cache, while a shared " +"backend—such as 3FS or enterprise-grade storage—serves as the persistent " +"KV store. This approach removes the capacity ceiling imposed by device " +"memory, enables durable and reliable prefix caching, and allows cache " +"capacity to scale with the storage system rather than with compute " +"resources." +msgstr "" +"统一缓存管理(UCM)为vLLM/vLLM-Ascend中的前缀缓存场景提供了一个外部的KV缓存存储层。与仅通过聚合设备内存来扩展前缀缓存容量、因此仍受限于HBM/DRAM大小且缺乏持久性的KV池化不同,UCM将计算与存储解耦,并采用分层设计。每个节点使用本地DRAM作为快速缓存,而共享后端(如3FS或企业级存储)则作为持久化的KV存储。这种方法消除了设备内存带来的容量上限,实现了持久可靠的前缀缓存,并使缓存容量能够随存储系统而非计算资源扩展。" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:7 +msgid "Prerequisites" +msgstr "先决条件" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:9 +msgid "OS: Linux" +msgstr "操作系统:Linux" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:10 +msgid "Hardware with Ascend NPUs. It's usually the Atlas 800 A2 series." +msgstr "配备昇腾NPU的硬件。通常是Atlas 800 A2系列。" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:11 +msgid "**vLLM: main branch**" +msgstr "**vLLM:main分支**" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:12 +msgid "**vLLM Ascend: main branch**" +msgstr "**vLLM Ascend:main分支**" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:14 +msgid "UCM Installation" +msgstr "UCM安装" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:16 +msgid "" +"**Please refer to the [official UCM installation guide for Ascend " +"NPU](https://ucm.readthedocs.io/en/latest/getting-" +"started/quickstart_vllm_ascend.html)**" +msgstr "" +"**请参考[昇腾NPU的官方UCM安装指南](https://ucm.readthedocs.io/en/latest/getting-started/quickstart_vllm_ascend.html)**" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:18 +msgid "Configure UCM for Prefix Caching" +msgstr "为前缀缓存配置UCM" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:20 +msgid "" +"Modify the UCM configuration file to specify which UCM connector to use " +"and where KV blocks should be stored. You may directly edit the example " +"file at:" +msgstr "修改UCM配置文件以指定使用哪个UCM连接器以及KV块应存储在何处。您可以直接编辑位于以下路径的示例文件:" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:23 +msgid "`unified-cache-management/examples/ucm_config_example.yaml`" +msgstr "`unified-cache-management/examples/ucm_config_example.yaml`" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:25 +msgid "" +"**For updated configuration options, please refer to the [official UCM " +"documentation for prefix-caching](https://ucm.readthedocs.io/en/latest" +"/user-guide/prefix-cache/nfs_store.html)**" +msgstr "" +"**有关最新的配置选项,请参考[前缀缓存的官方UCM文档](https://ucm.readthedocs.io/en/latest/user-guide/prefix-cache/nfs_store.html)**" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:27 +msgid "A minimal configuration looks like this:" +msgstr "一个最小配置示例如下:" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:39 +msgid "Explanation:" +msgstr "说明:" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:41 +msgid "" +"ucm_connector_name: \"UcmNfsStore\": Specifies `UcmNfsStore` as the UCM " +"connector." +msgstr "ucm_connector_name: \"UcmNfsStore\":指定`UcmNfsStore`作为UCM连接器。" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:44 +msgid "" +"storage_backends: Specify the directory used for storing KV blocks. It " +"can be a local directory or an NFS-mounted path. UCM will store KV blocks" +" here. **⚠️ Make sure to replace `\"/mnt/test\"` with your actual " +"storage directory.**" +msgstr "" +"storage_backends:指定用于存储KV块的目录。它可以是本地目录或NFS挂载路径。UCM将在此处存储KV块。**⚠️ 请确保将`\"/mnt/test\"`替换为您的实际存储目录。**" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:48 +msgid "use_direct: Whether to enable direct I/O (optional). Default is `false`." +msgstr "use_direct:是否启用直接I/O(可选)。默认为`false`。" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:51 +msgid "" +"load_only_first_rank: Controls whether only rank 0 loads KV cache and " +"broadcasts it to other ranks. This feature is currently not supported " +"on Ascend, so it must be set to `false` (all ranks load/dump " +"independently)." +msgstr "" +"load_only_first_rank:控制是否仅rank 0加载KV缓存并将其广播到其他rank。此功能目前在昇腾上不受支持,因此必须设置为`false`(所有rank独立加载/转储)。" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:55 +msgid "Launching Inference" +msgstr "启动推理" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:57 +msgid "" +"In this guide, we describe **online inference** using vLLM with the UCM " +"connector, deployed as an OpenAI-compatible server. For best performance " +"with UCM, it is recommended to set `block_size` to 128." +msgstr "在本指南中,我们描述使用带有UCM连接器的vLLM进行**在线推理**,部署为OpenAI兼容的服务器。为了获得UCM的最佳性能,建议将`block_size`设置为128。" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:59 +msgid "To start the vLLM server with the Qwen/Qwen2.5-14B-Instruct model, run:" +msgstr "要使用Qwen/Qwen2.5-14B-Instruct模型启动vLLM服务器,请运行:" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:79 +msgid "" +"**⚠️ Make sure to replace `\"/vllm-workspace/unified-cache-" +"management/examples/ucm_config_example.yaml\"` with your actual config " +"file path.**" +msgstr "**⚠️ 请确保将`\"/vllm-workspace/unified-cache-management/examples/ucm_config_example.yaml\"`替换为您的实际配置文件路径。**" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:81 +msgid "If you see the log below:" +msgstr "如果您看到以下日志:" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:89 +msgid "" +"Congratulations, you have successfully started the vLLM server with UCM " +"connector!" +msgstr "恭喜,您已成功启动带有UCM连接器的vLLM服务器!" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:91 +msgid "Evaluating UCM Prefix Caching Performance" +msgstr "评估UCM前缀缓存性能" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:93 +msgid "" +"After launching the vLLM server with `UCMConnector` enabled, the easiest " +"way to observe the prefix caching effect is to run the built-in `vllm " +"bench` CLI. Executing the following command **twice** in a separate " +"terminal shows the improvement clearly." +msgstr "在启用`UCMConnector`启动vLLM服务器后,观察前缀缓存效果的最简单方法是运行内置的`vllm bench` CLI。在单独的终端中**两次**执行以下命令可以清晰地展示改进效果。" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:112 +msgid "After the first execution" +msgstr "第一次执行后" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:114 +msgid "The `vllm bench` terminal prints the benchmark result:" +msgstr "`vllm bench`终端打印基准测试结果:" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:121 +msgid "Inspecting the vLLM server logs reveals entries like:" +msgstr "检查vLLM服务器日志会发现类似条目:" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:127 +msgid "" +"This indicates that for the first inference request, UCM did not hit any " +"cached KV blocks. As a result, the full 16K-token prefill must be " +"computed, leading to a relatively large TTFT." +msgstr "这表明对于第一个推理请求,UCM未命中任何缓存的KV块。因此,必须计算完整的16K令牌预填充,导致相对较大的TTFT。" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:129 +msgid "After the second execution" +msgstr "第二次执行后" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:131 +msgid "Running the same benchmark again produces:" +msgstr "再次运行相同的基准测试会产生:" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:138 +msgid "The vLLM server logs now contain similar entries:" +msgstr "vLLM服务器日志现在包含类似条目:" + +#: ../../source/user_guide/feature_guide/ucm_deployment.md:144 +msgid "" +"This indicates that during the second request, UCM successfully retrieved" +" all 125 cached KV blocks from the storage backend. Leveraging the fully " +"cached prefix significantly reduces the initial latency observed by the " +"model, yielding an approximate **8× improvement in TTFT** compared to the" +" initial run." +msgstr "这表明在第二次请求期间,UCM成功从存储后端检索了全部125个缓存的KV块。利用完全缓存的前缀显著减少了模型观察到的初始延迟,与首次运行相比,TTFT实现了约**8倍的提升**。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/weight_prefetch.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/weight_prefetch.po new file mode 100644 index 00000000..7a2da4b6 --- /dev/null +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/weight_prefetch.po @@ -0,0 +1,171 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2025, vllm-ascend team +# This file is distributed under the same license as the vllm-ascend +# package. +# FIRST AUTHOR , 2026. +# +msgid "" +msgstr "" +"Project-Id-Version: vllm-ascend \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 2.18.0\n" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:1 +msgid "Weight Prefetch Guide" +msgstr "权重预取指南" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:3 +msgid "" +"Weight prefetching optimizes memory usage by preloading weights into the " +"cache before they are needed, minimizing delays caused by memory access " +"during model execution. Linear layers sometimes exhibit relatively high " +"MTE utilization. To address this, we create a separate pipeline " +"specifically for weight prefetching, which runs in parallel with the " +"original vector computation pipeline, such as quantize, MoE gating top_k," +" RMSNorm and SwiGlu. This approach allows the weights to be preloaded to " +"L2 cache ahead of time, reducing MTE utilization during the linear layer " +"computations and indirectly improving Cube computation efficiency by " +"minimizing resource contention and optimizing data flow." +msgstr "" +"权重预取通过在需要之前将权重预加载到缓存中来优化内存使用,从而最小化模型执行期间因内存访问造成的延迟。线性层有时表现出相对较高的MTE利用率。为了解决这个问题,我们创建了一个专门用于权重预取的独立流水线,该流水线与原始向量计算流水线(如量化、MoE门控top_k、RMSNorm和SwiGlu)并行运行。这种方法允许权重提前预加载到L2缓存中,减少线性层计算期间的MTE利用率,并通过最小化资源争用和优化数据流间接提高Cube计算效率。" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:5 +msgid "" +"Since we use vector computations to hide the weight prefetching pipeline," +" this has an effect on computation. If you prioritize low latency over " +"high throughput, it is best not to enable prefetching." +msgstr "" +"由于我们使用向量计算来隐藏权重预取流水线,这会对计算产生影响。如果您优先考虑低延迟而非高吞吐量,最好不要启用预取。" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:7 +msgid "Quick Start" +msgstr "快速开始" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:9 +#, python-brace-format +msgid "" +"With `--additional-config '{\"weight_prefetch_config\": {\"enabled\": " +"true}}'` to open weight prefetch." +msgstr "" +"使用 `--additional-config '{\"weight_prefetch_config\": {\"enabled\": " +"true}}'` 来开启权重预取。" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:11 +msgid "Fine-tune Prefetch Ratio" +msgstr "微调预取比例" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:13 +msgid "" +"Since weight prefetch use vector computations to hide the weight " +"prefetching pipeline, the setting of the prefetch size is crucial. If the" +" size is too small, the optimization benefits will not be fully realized," +" while a larger size may lead to resource contention, resulting in " +"performance degradation. To accommodate different scenarios, we have " +"added `prefetch_ratio` to allow for flexible size configuration based on " +"the specific workload, details as follows:" +msgstr "" +"由于权重预取使用向量计算来隐藏权重预取流水线,预取大小的设置至关重要。如果大小太小,则无法充分发挥优化优势;而较大的大小可能导致资源争用,从而导致性能下降。为了适应不同的场景,我们添加了`prefetch_ratio`,允许根据具体工作负载灵活配置大小,详情如下:" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:15 +msgid "" +"With `prefetch_ratio` in `\"weight_prefetch_config\"` to custom the " +"weight prefetch ratio for specific linear layers." +msgstr "" +"使用`\"weight_prefetch_config\"`中的`prefetch_ratio`来为特定的线性层自定义权重预取比例。" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:17 +msgid "" +"The “attn” and “moe” configuration options are used for MoE model, " +"details as follows:" +msgstr "" +"“attn”和“moe”配置选项用于MoE模型,详情如下:" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:19 +#, python-brace-format +msgid "`\"attn\": { \"qkv\": 1.0, \"o\": 1.0}, \"moe\": {\"gate_up\": 0.8}`" +msgstr "`\"attn\": { \"qkv\": 1.0, \"o\": 1.0}, \"moe\": {\"gate_up\": 0.8}`" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:21 +msgid "" +"The “mlp” configuration option is used to optimize the performance of the" +" Dense model, details as follows:" +msgstr "" +"“mlp”配置选项用于优化Dense模型的性能,详情如下:" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:23 +#, python-brace-format +msgid "`\"mlp\": {\"gate_up\": 1.0, \"down\": 1.0}`" +msgstr "`\"mlp\": {\"gate_up\": 1.0, \"down\": 1.0}`" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:25 +msgid "" +"Above value are the default config, the default value has a good " +"performance for Qwen3-235B-A22B-W8A8 when `--max-num-seqs` is 144, for " +"Qwen3-32B-W8A8 when `--max-num-seqs` is 72." +msgstr "" +"以上值为默认配置,当`--max-num-seqs`为144时,该默认值对Qwen3-235B-A22B-W8A8有良好性能;当`--max-num-seqs`为72时,对Qwen3-32B-W8A8有良好性能。" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:27 +msgid "" +"However, this may not be the optimal configuration for your scenario. For" +" higher concurrency, you can try increasing the prefetch size. For lower " +"concurrency, prefetching may not offer any advantages, so you can " +"decrease the size or disable prefetching. Determine if the prefetch size " +"is appropriate by collecting profiling data. Specifically, check if the " +"time required for the prefetch operation (e.g., MLP Down Proj weight " +"prefetching) overlaps with the time required for parallel vector " +"computation operators (e.g., SwiGlu computation), and whether the " +"prefetch operation is no later than the completion time of the vector " +"computation operator. In the profiling timeline, a prefetch operation " +"appears as a CMO operation on a single stream; this CMO operation is the " +"prefetch operation." +msgstr "" +"然而,这可能不是您场景下的最优配置。对于更高的并发度,可以尝试增加预取大小。对于较低的并发度,预取可能不会带来任何优势,因此可以减少大小或禁用预取。通过收集性能分析数据来确定预取大小是否合适。具体来说,检查预取操作(例如,MLP Down Proj权重预取)所需的时间是否与并行向量计算算子(例如,SwiGlu计算)所需的时间重叠,以及预取操作是否不晚于向量计算算子的完成时间。在性能分析时间线中,预取操作显示为单个流上的CMO操作;此CMO操作即为预取操作。" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:29 +msgid "Notes:" +msgstr "注意:" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:31 +msgid "" +"Weight prefetch of MLP `down` project prefetch depends on sequence " +"parallel, if you want to open for mlp `down` please also enable sequence " +"parallel." +msgstr "" +"MLP `down`投影的权重预取依赖于序列并行,如果您想为mlp `down`开启预取,请同时启用序列并行。" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:32 +msgid "" +"Due to the current size of the L2 cache, the maximum prefetch cannot " +"exceed 18MB. If `prefetch_ratio * linear_layer_weight_size >= 18 * 1024 *" +" 1024` bytes, the backend will only prefetch 18MB." +msgstr "" +"由于当前L2缓存的大小,最大预取量不能超过18MB。如果`prefetch_ratio * linear_layer_weight_size >= 18 * 1024 * 1024`字节,后端将只预取18MB。" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:34 +msgid "Example" +msgstr "示例" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:36 +msgid "For MoE model:" +msgstr "对于MoE模型:" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:56 +msgid "For dense model:" +msgstr "对于Dense模型:" + +#: ../../source/user_guide/feature_guide/weight_prefetch.md:58 +msgid "" +"Following is the default configuration that can get a good performance " +"for `--max-num-seqs` is 72 for Qwen3-32B-W8A8" +msgstr "" +"以下是默认配置,当`--max-num-seqs`为72时,该配置可为Qwen3-32B-W8A8带来良好性能" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po index bafbce5a..50585e3b 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po @@ -1,1660 +1,7206 @@ -# Translations template for PROJECT. +# Chinese translations for PROJECT. # Copyright (C) 2025 ORGANIZATION # This file is distributed under the same license as the PROJECT project. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: 2025-07-18 10:11+0800\n" "Last-Translator: \n" -"Language-Team: \n" "Language: zh\n" +"Language-Team: \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel 2.17.0\n" -"X-Generator: Poedit 3.5\n" +"Generated-By: Babel 2.18.0\n" -#: ../../user_guide/release_notes.md:1 -msgid "Release note" +#: ../../source/user_guide/release_notes.md:1 +msgid "Release Notes" msgstr "版本说明" -#: ../../user_guide/release_notes.md:3 -msgid "v0.9.2rc1 - 2025.07.11" -msgstr "" +#: ../../source/user_guide/release_notes.md:3 +msgid "v0.17.0rc1 - 2026.03.15" +msgstr "v0.17.0rc1 - 2026.03.15" -#: ../../user_guide/release_notes.md:5 +#: ../../source/user_guide/release_notes.md:5 msgid "" -"This is the 1st release candidate of v0.9.2 for vLLM Ascend. Please follow " -"the [official doc](https://vllm-ascend.readthedocs.io/en/) to get started. " -"From this release, V1 engine will be enabled by default, there is no need " -"to set `VLLM_USE_V1=1` any more. And this release is the last version to " -"support V0 engine, V0 code will be clean up in the future." +"This is the first release candidate of v0.17.0 for vLLM Ascend. Please " +"follow the [official doc](https://docs.vllm.ai/projects/ascend/en/latest)" +" to get started." msgstr "" -"这是 vLLM Ascend v0.9.2 的第一个候选发布版本。请参阅[官方文档](https://vllm-" -"ascend.readthedocs.io/en/)开始使用。从本次发布起,V1 引擎将默认启用,不再需" -"要设置 `VLLM_USE_V1=1`。此外,该版本也是最后一个支持 V0 引擎的版本,V0 相关" -"代码将在未来被清理。" +"这是 vLLM Ascend v0.17.0 的第一个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/latest)开始使用。" -#: ../../user_guide/release_notes.md:7 ../../user_guide/release_notes.md:34 -#: ../../user_guide/release_notes.md:70 ../../user_guide/release_notes.md:78 -#: ../../user_guide/release_notes.md:116 ../../user_guide/release_notes.md:140 -#: ../../user_guide/release_notes.md:163 ../../user_guide/release_notes.md:186 -#: ../../user_guide/release_notes.md:206 ../../user_guide/release_notes.md:231 -#: ../../user_guide/release_notes.md:253 ../../user_guide/release_notes.md:285 +#: ../../source/user_guide/release_notes.md:7 +#: ../../source/user_guide/release_notes.md:61 +#: ../../source/user_guide/release_notes.md:152 +#: ../../source/user_guide/release_notes.md:241 +#: ../../source/user_guide/release_notes.md:354 +#: ../../source/user_guide/release_notes.md:403 +#: ../../source/user_guide/release_notes.md:458 +#: ../../source/user_guide/release_notes.md:515 +#: ../../source/user_guide/release_notes.md:553 +#: ../../source/user_guide/release_notes.md:604 +#: ../../source/user_guide/release_notes.md:623 +#: ../../source/user_guide/release_notes.md:657 +#: ../../source/user_guide/release_notes.md:692 +#: ../../source/user_guide/release_notes.md:716 +#: ../../source/user_guide/release_notes.md:759 +#: ../../source/user_guide/release_notes.md:815 +#: ../../source/user_guide/release_notes.md:889 +#: ../../source/user_guide/release_notes.md:948 +#: ../../source/user_guide/release_notes.md:1060 +#: ../../source/user_guide/release_notes.md:1164 +#: ../../source/user_guide/release_notes.md:1172 +#: ../../source/user_guide/release_notes.md:1210 +#: ../../source/user_guide/release_notes.md:1234 +#: ../../source/user_guide/release_notes.md:1261 +#: ../../source/user_guide/release_notes.md:1287 +#: ../../source/user_guide/release_notes.md:1310 +#: ../../source/user_guide/release_notes.md:1336 +#: ../../source/user_guide/release_notes.md:1362 +#: ../../source/user_guide/release_notes.md:1399 msgid "Highlights" msgstr "亮点" -#: ../../user_guide/release_notes.md:8 +#: ../../source/user_guide/release_notes.md:9 msgid "" -"Pooling model works with V1 engine now. You can take a try with Qwen3 " -"embedding model [#1359](https://github.com/vllm-project/vllm-ascend/" -"pull/1359)." +"Ascend950 chip is now supported. [#7151](https://github.com/vllm-project" +"/vllm-ascend/pull/7151)" msgstr "" -"Pooling 模型现在可以与 V1 引擎一起使用。你可以尝试使用 Qwen3 embedding 模型 " -"[#1359](https://github.com/vllm-project/vllm-ascend/pull/1359)。" +"现已支持 Ascend950 芯片。 [#7151](https://github.com/vllm-project/vllm-ascend/pull/7151)" -#: ../../user_guide/release_notes.md:9 +#: ../../source/user_guide/release_notes.md:10 msgid "" -"The performance on Atlas 300I series has been improved. [#1591](https://" -"github.com/vllm-project/vllm-ascend/pull/1591)" +"ACLGraph (graph mode) is now supported for Model Runner V2. " +"[#7110](https://github.com/vllm-project/vllm-ascend/pull/7110)" msgstr "" -"Atlas 300I 系列的性能已经提升。 [#1591](https://github.com/vllm-project/" -"vllm-ascend/pull/1591)" +"Model Runner V2 现已支持 ACLGraph(图模式)。 [#7110](https://github.com/vllm-project/vllm-ascend/pull/7110)" -#: ../../user_guide/release_notes.md:10 +#: ../../source/user_guide/release_notes.md:11 msgid "" -"aclgraph mode works with Moe models now. Currently, only Qwen3 Moe is well " -"tested. [#1381](https://github.com/vllm-project/vllm-ascend/pull/1381)" +"Unified parallelized speculative decoding is supported, enabling parallel" +" draft inference schemes simultaneously. [#6766](https://github.com/vllm-" +"project/vllm-ascend/pull/6766)" msgstr "" -"aclgraph 模式现在可以与 Moe 模型一起使用。目前,仅对 Qwen3 Moe 进行了充分测" -"试。[#1381](https://github.com/vllm-project/vllm-ascend/pull/1381)" +"现已支持统一的并行化推测解码,可同时启用并行草稿推理方案。 [#6766](https://github.com/vllm-project/vllm-ascend/pull/6766)" -#: ../../user_guide/release_notes.md:12 ../../user_guide/release_notes.md:39 -#: ../../user_guide/release_notes.md:83 ../../user_guide/release_notes.md:146 -#: ../../user_guide/release_notes.md:168 ../../user_guide/release_notes.md:191 -#: ../../user_guide/release_notes.md:212 ../../user_guide/release_notes.md:236 -#: ../../user_guide/release_notes.md:258 ../../user_guide/release_notes.md:291 +#: ../../source/user_guide/release_notes.md:13 +#: ../../source/user_guide/release_notes.md:66 +#: ../../source/user_guide/release_notes.md:159 +#: ../../source/user_guide/release_notes.md:266 +#: ../../source/user_guide/release_notes.md:361 +#: ../../source/user_guide/release_notes.md:407 +#: ../../source/user_guide/release_notes.md:464 +msgid "Features" +msgstr "新功能" + +#: ../../source/user_guide/release_notes.md:15 +msgid "" +"Auto-detect quantization format from model files, and remote model IDs " +"(e.g., `org/model-name`) are also supported. `--quantization ascend` is " +"not required now. [#7111](https://github.com/vllm-project/vllm-" +"ascend/pull/7111)" +msgstr "" +"支持从模型文件自动检测量化格式,同时也支持远程模型 ID(例如 `org/model-name`)。现在不再需要 `--quantization ascend` 参数。 [#7111](https://github.com/vllm-project/vllm-ascend/pull/7111)" + +#: ../../source/user_guide/release_notes.md:16 +msgid "Qwen3.5 is supported from this version on." +msgstr "从本版本起支持 Qwen3.5。" + +#: ../../source/user_guide/release_notes.md:17 +msgid "" +"FlashLB algorithm for EPLB: supports per-step heat collection and multi-" +"stage load balancing for better expert parallelism efficiency. " +"[#6477](https://github.com/vllm-project/vllm-ascend/pull/6477)" +msgstr "" +"用于 EPLB 的 FlashLB 算法:支持每步热度收集和多阶段负载均衡,以提高专家并行效率。 [#6477](https://github.com/vllm-project/vllm-ascend/pull/6477)" + +#: ../../source/user_guide/release_notes.md:18 +msgid "" +"LoRA with tensor parallel and `--fully-sharded-loras` is now fixed and " +"working. [#6650](https://github.com/vllm-project/vllm-ascend/pull/6650)" +msgstr "" +"已修复并支持了结合张量并行和 `--fully-sharded-loras` 的 LoRA。 [#6650](https://github.com/vllm-project/vllm-ascend/pull/6650)" + +#: ../../source/user_guide/release_notes.md:19 +msgid "" +"LMCacheAscendConnector is added as a new KV cache pooling solution for " +"Ascend. [#6882](https://github.com/vllm-project/vllm-ascend/pull/6882)" +msgstr "" +"新增 LMCacheAscendConnector 作为 Ascend 的新 KV 缓存池化解决方案。 [#6882](https://github.com/vllm-project/vllm-ascend/pull/6882)" + +#: ../../source/user_guide/release_notes.md:20 +msgid "" +"W8A8C8 quantization is now supported for DeepSeek-V3.2 in PD-mix " +"scenario. [#7029](https://github.com/vllm-project/vllm-ascend/pull/7029)" +msgstr "" +"现已在 PD-mix 场景下支持 DeepSeek-V3.2 的 W8A8C8 量化。 [#7029](https://github.com/vllm-project/vllm-ascend/pull/7029)" + +#: ../../source/user_guide/release_notes.md:21 +msgid "" +"[Experimental] Minimax-m2.5 model is now supported on Ascend NPU. " +"[#7105](https://github.com/vllm-project/vllm-ascend/pull/7105)" +msgstr "" +"[实验性] 现已在 Ascend NPU 上支持 Minimax-m2.5 模型。 [#7105](https://github.com/vllm-project/vllm-ascend/pull/7105)" + +#: ../../source/user_guide/release_notes.md:22 +msgid "" +"[Experimental] Mooncake Layerwise Connector now supports hybrid attention" +" manager with multiple KV cache groups. [#7022](https://github.com/vllm-" +"project/vllm-ascend/pull/7022)" +msgstr "" +"[实验性] Mooncake Layerwise Connector 现在支持具有多个 KV 缓存组的混合注意力管理器。 [#7022](https://github.com/vllm-project/vllm-ascend/pull/7022)" + +#: ../../source/user_guide/release_notes.md:23 +msgid "" +"[Experimental] Prefix cache is now supported in hybrid model. " +"[#7103](https://github.com/vllm-project/vllm-ascend/pull/7103)" +msgstr "" +"[实验性] 混合模型现已支持前缀缓存。 [#7103](https://github.com/vllm-project/vllm-ascend/pull/7103)" + +#: ../../source/user_guide/release_notes.md:25 +#: ../../source/user_guide/release_notes.md:83 +#: ../../source/user_guide/release_notes.md:187 +#: ../../source/user_guide/release_notes.md:286 +#: ../../source/user_guide/release_notes.md:367 +#: ../../source/user_guide/release_notes.md:422 +#: ../../source/user_guide/release_notes.md:474 +msgid "Performance" +msgstr "性能" + +#: ../../source/user_guide/release_notes.md:27 +msgid "" +"Pipeline Parallel now supports async scheduling, improving throughput for" +" PP deployments. [#7136](https://github.com/vllm-project/vllm-" +"ascend/pull/7136)" +msgstr "" +"流水线并行现在支持异步调度,提高了 PP 部署的吞吐量。 [#7136](https://github.com/vllm-project/vllm-ascend/pull/7136)" + +#: ../../source/user_guide/release_notes.md:28 +msgid "" +"Improved TTFT when using Mooncake connector by reducing log overhead. " +"[#6125](https://github.com/vllm-project/vllm-ascend/pull/6125)" +msgstr "" +"通过减少日志开销,改善了使用 Mooncake connector 时的首词元延迟。 [#6125](https://github.com/vllm-project/vllm-ascend/pull/6125)" + +#: ../../source/user_guide/release_notes.md:29 +msgid "" +"KV Pool lookup is optimized for short sequences (token length < " +"block_size). [#7146](https://github.com/vllm-project/vllm-" +"ascend/pull/7146)" +msgstr "" +"针对短序列(token 长度 < block_size)优化了 KV 池查找。 [#7146](https://github.com/vllm-project/vllm-ascend/pull/7146)" + +#: ../../source/user_guide/release_notes.md:30 +msgid "" +"Fix penalty ops in Model Runner V2, achieving ~10% performance " +"improvement. [#7013](https://github.com/vllm-project/vllm-" +"ascend/pull/7013)" +msgstr "" +"修复了 Model Runner V2 中的惩罚操作,实现了约 10% 的性能提升。 [#7013](https://github.com/vllm-project/vllm-ascend/pull/7013)" + +#: ../../source/user_guide/release_notes.md:32 +#: ../../source/user_guide/release_notes.md:101 +#: ../../source/user_guide/release_notes.md:209 +#: ../../source/user_guide/release_notes.md:326 +msgid "Documentation" +msgstr "文档" + +#: ../../source/user_guide/release_notes.md:34 +msgid "" +"Added EPD (Encode-Prefill-Decode) documentation and load-balance proxy " +"example. [#6221](https://github.com/vllm-project/vllm-ascend/pull/6221)" +msgstr "" +"新增了 EPD(编码-预填充-解码)文档和负载均衡代理示例。 [#6221](https://github.com/vllm-project/vllm-ascend/pull/6221)" + +#: ../../source/user_guide/release_notes.md:35 +msgid "" +"Added Ascend PyTorch Profiler usage guide. [#7117](https://github.com" +"/vllm-project/vllm-ascend/pull/7117)" +msgstr "" +"新增了 Ascend PyTorch Profiler 使用指南。 [#7117](https://github.com/vllm-project/vllm-ascend/pull/7117)" + +#: ../../source/user_guide/release_notes.md:36 +msgid "" +"Fixed DSV3.1 PD configuration documentation. [#7187](https://github.com" +"/vllm-project/vllm-ascend/pull/7187)" +msgstr "" +"修复了 DSV3.1 PD 配置文档。 [#7187](https://github.com/vllm-project/vllm-ascend/pull/7187)" + +#: ../../source/user_guide/release_notes.md:38 +#: ../../source/user_guide/release_notes.md:109 +#: ../../source/user_guide/release_notes.md:220 +#: ../../source/user_guide/release_notes.md:332 +#: ../../source/user_guide/release_notes.md:377 +#: ../../source/user_guide/release_notes.md:427 +#: ../../source/user_guide/release_notes.md:705 +#: ../../source/user_guide/release_notes.md:729 +#: ../../source/user_guide/release_notes.md:773 +#: ../../source/user_guide/release_notes.md:904 +#: ../../source/user_guide/release_notes.md:1074 +#: ../../source/user_guide/release_notes.md:1193 +#: ../../source/user_guide/release_notes.md:1250 +#: ../../source/user_guide/release_notes.md:1277 +#: ../../source/user_guide/release_notes.md:1298 +#: ../../source/user_guide/release_notes.md:1323 +#: ../../source/user_guide/release_notes.md:1350 +#: ../../source/user_guide/release_notes.md:1378 +#: ../../source/user_guide/release_notes.md:1410 +msgid "Others" +msgstr "其他" + +#: ../../source/user_guide/release_notes.md:40 +msgid "" +"Fix drafter crash in full graph mode for speculative decoding. " +"[#7158](https://github.com/vllm-project/vllm-ascend/pull/7158) " +"[#7148](https://github.com/vllm-project/vllm-ascend/pull/7148)" +msgstr "" +"修复了推测解码在全图模式下草稿器崩溃的问题。 [#7158](https://github.com/vllm-project/vllm-ascend/pull/7158) [#7148](https://github.com/vllm-project/vllm-ascend/pull/7148)" + +#: ../../source/user_guide/release_notes.md:41 +msgid "" +"Fix GLM5-W8A8 precision issues caused by rotary quant MTP weights. " +"[#7139](https://github.com/vllm-project/vllm-ascend/pull/7139)" +msgstr "" +"修复了由旋转量化 MTP 权重引起的 GLM5-W8A8 精度问题。 [#7139](https://github.com/vllm-project/vllm-ascend/pull/7139)" + +#: ../../source/user_guide/release_notes.md:42 +msgid "" +"Fix ngram graph replay accuracy error on 310P. [#7134](https://github.com" +"/vllm-project/vllm-ascend/pull/7134)" +msgstr "" +"修复了 310P 上 ngram 图重放的精度错误。 [#7134](https://github.com/vllm-project/vllm-ascend/pull/7134)" + +#: ../../source/user_guide/release_notes.md:43 +msgid "" +"Fix FIA pad logic in graph mode after upstream vLLM change. " +"[#7144](https://github.com/vllm-project/vllm-ascend/pull/7144)" +msgstr "" +"在上游 vLLM 变更后,修复了图模式下的 FIA 填充逻辑。 [#7144](https://github.com/vllm-project/vllm-ascend/pull/7144)" + +#: ../../source/user_guide/release_notes.md:44 +msgid "" +"Fix a precision issue caused by wrong KV cache reshape on Qwen3.5. " +"[#7209](https://github.com/vllm-project/vllm-ascend/pull/7209)" +msgstr "" +"修复了 Qwen3.5 上因 KV 缓存重塑错误导致的精度问题。 [#7209](https://github.com/vllm-project/vllm-ascend/pull/7209)" + +#: ../../source/user_guide/release_notes.md:45 +msgid "" +"Fix extra processes spawned on rank0 device. [#7107](https://github.com" +"/vllm-project/vllm-ascend/pull/7107)" +msgstr "" +"修复了 rank0 设备上产生的额外进程问题。[#7107](https://github.com/vllm-project/vllm-" +"ascend/pull/7107)" + +#: ../../source/user_guide/release_notes.md:46 +msgid "" +"Graph capture failures now properly raise exceptions for easier " +"debugging. [#5644](https://github.com/vllm-project/vllm-ascend/pull/5644)" +msgstr "" +"图捕获失败现在会正确抛出异常,便于调试。[#5644](https://github.com/vllm-project/vllm-" +"ascend/pull/5644)" + +#: ../../source/user_guide/release_notes.md:47 +msgid "" +"Fix Qwen3.5 model by replacing torch_npu.npu_recurrent_gated_delta_rule " +"by fused_recurrent_gated_delta_rule. [#7109](https://github.com/vllm-" +"project/vllm-ascend/pull/7109)" +msgstr "" +"通过将 torch_npu.npu_recurrent_gated_delta_rule 替换为 " +"fused_recurrent_gated_delta_rule 来修复 Qwen3.5 模型。[#7109](https://github.com/vllm-" +"project/vllm-ascend/pull/7109)" + +#: ../../source/user_guide/release_notes.md:48 +msgid "" +"Fix the bug when running Qwen3-Reranker-0.6B with LoRA. " +"[#7156](https://github.com/vllm-project/vllm-ascend/pull/7156)" +msgstr "" +"修复了运行带 LoRA 的 Qwen3-Reranker-0.6B 时的错误。[#7156](https://github.com/vllm-" +"project/vllm-ascend/pull/7156)" + +#: ../../source/user_guide/release_notes.md:50 +#: ../../source/user_guide/release_notes.md:142 +#: ../../source/user_guide/release_notes.md:343 +#: ../../source/user_guide/release_notes.md:1201 +msgid "Known Issue" +msgstr "已知问题" + +#: ../../source/user_guide/release_notes.md:52 +msgid "" +"GLM5 requires transformers==5.2.0, and this will be resolved by [vllm-" +"project/vllm#30566](https://github.com/vllm-project/vllm/pull/30566), " +"will not be included in v0.17.0." +msgstr "" +"GLM5 需要 transformers==5.2.0,此问题将通过 [vllm-project/vllm#30566](https://github.com/vllm-project/vllm/pull/30566) 解决,不会包含在 v0.17.0 版本中。" + +#: ../../source/user_guide/release_notes.md:53 +msgid "" +"There is a precision issue with Qwen3-Next due to the changed tp weight " +"split method. Will fix it in next release." +msgstr "" +"由于 TP 权重切分方法变更,Qwen3-Next 存在精度问题。将在下个版本中修复。" + +#: ../../source/user_guide/release_notes.md:54 +msgid "" +"In hybrid models, the minimum token count required for a prefix cache hit" +" is currently large. The exact number is related to tp size, e.g., with " +"tp 2, the block_size is adjusted to 2048, which means that any prefix " +"shorter than 2048 will never be cached." +msgstr "" +"在混合模型中,当前前缀缓存命中所需的最小令牌数较大。具体数值与 TP 大小相关,例如,TP 为 2 时,block_size 调整为 2048,这意味着任何短于 2048 的前缀都不会被缓存。" + +#: ../../source/user_guide/release_notes.md:55 +msgid "" +"GLM5 has an issue in the 2-node PD mixed deployment scenario where " +"inference may hang when concurrency exceeds 8 (fixed in PR " +"[#7235](https://github.com/vllm-project/vllm-ascend/pull/7235) " +"[#7290](https://github.com/vllm-project/vllm-ascend/pull/7290))." +msgstr "" +"GLM5 在两节点 PD 混合部署场景中存在一个问题,当并发数超过 8 时推理可能挂起(已在 PR [#7235](https://github.com/vllm-project/vllm-ascend/pull/7235) 和 [#7290](https://github.com/vllm-project/vllm-ascend/pull/7290) 中修复)。" + +#: ../../source/user_guide/release_notes.md:57 +msgid "v0.16.0rc1 - 2026.03.09" +msgstr "v0.16.0rc1 - 2026.03.09" + +#: ../../source/user_guide/release_notes.md:59 +msgid "" +"This is the first release candidate of v0.16.0 for vLLM Ascend. Please " +"follow the [official doc](https://docs.vllm.ai/projects/ascend/en/latest)" +" to get started." +msgstr "" +"这是 vLLM Ascend v0.16.0 的第一个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/latest)开始使用。" + +#: ../../source/user_guide/release_notes.md:63 +msgid "" +"Qwen3-Omni quantization adaptation and optimization is now available. " +"[#6828](https://github.com/vllm-project/vllm-ascend/pull/6828)" +msgstr "" +"Qwen3-Omni 量化适配与优化现已可用。[#6828](https://github.com/vllm-project/vllm-" +"ascend/pull/6828)" + +#: ../../source/user_guide/release_notes.md:64 +msgid "" +"GLM5-W8A8 quantization is now supported by parameterizing hardcoded MLA " +"dimensions. [#6902](https://github.com/vllm-project/vllm-" +"ascend/pull/6902)" +msgstr "" +"通过参数化硬编码的 MLA 维度,现已支持 GLM5-W8A8 量化。[#6902](https://github.com/vllm-project/vllm-" +"ascend/pull/6902)" + +#: ../../source/user_guide/release_notes.md:68 +msgid "" +"[Experimental] Support FabricMem Mode for ADXL/HIXL interconnect. " +"[#6806](https://github.com/vllm-project/vllm-ascend/pull/6806)" +msgstr "" +"[实验性] 为 ADXL/HIXL 互连支持 FabricMem 模式。[#6806](https://github.com/vllm-project/vllm-" +"ascend/pull/6806)" + +#: ../../source/user_guide/release_notes.md:69 +msgid "" +"Qwen3-Next now supports FlashComm1. [#6830](https://github.com/vllm-" +"project/vllm-ascend/pull/6830)" +msgstr "" +"Qwen3-Next 现在支持 FlashComm1。[#6830](https://github.com/vllm-project/vllm-" +"ascend/pull/6830)" + +#: ../../source/user_guide/release_notes.md:70 +msgid "" +"NPUWorker Profiler now supports profile_prefix for better profiling " +"experience. [#6968](https://github.com/vllm-project/vllm-" +"ascend/pull/6968)" +msgstr "" +"NPUWorker Profiler 现在支持 profile_prefix,以提供更好的性能分析体验。[#6968](https://github.com/vllm-project/vllm-" +"ascend/pull/6968)" + +#: ../../source/user_guide/release_notes.md:71 +msgid "" +"EPLB profiling now displays expert hotness comparison and time required " +"for eplb adjustment. [#6877](https://github.com/vllm-project/vllm-" +"ascend/pull/6877) [#7001](https://github.com/vllm-project/vllm-" +"ascend/pull/7001)]" +msgstr "" +"EPLB 性能分析现在会显示专家热度对比以及 eplb 调整所需的时间。[#6877](https://github.com/vllm-project/vllm-" +"ascend/pull/6877) [#7001](https://github.com/vllm-project/vllm-ascend/pull/7001)]" + +#: ../../source/user_guide/release_notes.md:72 +msgid "" +"Xlite Qwen3 MoE now supports Data Parallel. [#6715](https://github.com" +"/vllm-project/vllm-ascend/pull/6715)" +msgstr "" +"Xlite Qwen3 MoE 现在支持数据并行。[#6715](https://github.com/vllm-project/vllm-" +"ascend/pull/6715)" + +#: ../../source/user_guide/release_notes.md:73 +msgid "" +"Mooncake Layerwise Connector now supports kv_pool. " +"[#7032](https://github.com/vllm-project/vllm-ascend/pull/7032)" +msgstr "" +"Mooncake Layerwise Connector 现在支持 kv_pool。[#7032](https://github.com/vllm-project/vllm-" +"ascend/pull/7032)" + +#: ../../source/user_guide/release_notes.md:74 +msgid "" +"Eagle3 now supports QuaRot quantization without embedding. " +"[#7038](https://github.com/vllm-project/vllm-ascend/pull/7038)" +msgstr "" +"Eagle3 现在支持不带嵌入层的 QuaRot 量化。[#7038](https://github.com/vllm-project/vllm-" +"ascend/pull/7038)" + +#: ../../source/user_guide/release_notes.md:76 +#: ../../source/user_guide/release_notes.md:175 +#: ../../source/user_guide/release_notes.md:273 +msgid "Hardware and Operator Support" +msgstr "硬件与算子支持" + +#: ../../source/user_guide/release_notes.md:78 +msgid "" +"310P now supports w8a8sc quantization method. [#7075](https://github.com" +"/vllm-project/vllm-ascend/pull/7075)" +msgstr "" +"310P 现在支持 w8a8sc 量化方法。[#7075](https://github.com/vllm-project/vllm-" +"ascend/pull/7075)" + +#: ../../source/user_guide/release_notes.md:79 +msgid "" +"Added AscendC casual_conv1d_fn operator for Qwen3-Next. " +"[#6661](https://github.com/vllm-project/vllm-ascend/pull/6661)" +msgstr "" +"为 Qwen3-Next 添加了 AscendC casual_conv1d_fn 算子。[#6661](https://github.com/vllm-project/vllm-" +"ascend/pull/6661)" + +#: ../../source/user_guide/release_notes.md:80 +msgid "" +"Added Ascend Ops recurrent_gated_delta_rule operator. " +"[#6725](https://github.com/vllm-project/vllm-ascend/pull/6725)" +msgstr "" +"添加了 Ascend Ops recurrent_gated_delta_rule 算子。[#6725](https://github.com/vllm-project/vllm-" +"ascend/pull/6725)" + +#: ../../source/user_guide/release_notes.md:81 +msgid "" +"Added GMM custom operator for MoE models. [#7010](https://github.com" +"/vllm-project/vllm-ascend/pull/7010)" +msgstr "" +"为 MoE 模型添加了 GMM 自定义算子。[#7010](https://github.com/vllm-project/vllm-" +"ascend/pull/7010)" + +#: ../../source/user_guide/release_notes.md:85 +msgid "" +"Faster convolution computation improves TTFT by 0.95% and throughput by " +"0.59% for Qwen3-VL models. [#7017](https://github.com/vllm-project/vllm-" +"ascend/pull/7017)" +msgstr "" +"更快的卷积计算将 Qwen3-VL 模型的 TTFT 提升了 0.95%,吞吐量提升了 0.59%。[#7017](https://github.com/vllm-project/vllm-" +"ascend/pull/7017)" + +#: ../../source/user_guide/release_notes.md:86 +msgid "" +"Optimize split_qkv_rmsnorm_rope operator. [#6827](https://github.com" +"/vllm-project/vllm-ascend/pull/6827)" +msgstr "" +"优化 split_qkv_rmsnorm_rope 算子。[#6827](https://github.com/vllm-project/vllm-" +"ascend/pull/6827)" + +#: ../../source/user_guide/release_notes.md:87 +msgid "" +"Implement global CPU slicing and improve IRQ binding for Ascend NPUs, " +"ensuring non-overlapping CPU partitions and better resource management. " +"[#6945](https://github.com/vllm-project/vllm-ascend/pull/6945)" +msgstr "" +"实现全局 CPU 切片并改进 Ascend NPU 的 IRQ 绑定,确保 CPU 分区不重叠并实现更好的资源管理。[#6945](https://github.com/vllm-project/vllm-" +"ascend/pull/6945)" + +#: ../../source/user_guide/release_notes.md:88 +msgid "" +"Optimize MTP execution by reordering state update operation. " +"[#6844](https://github.com/vllm-project/vllm-ascend/pull/6844)" +msgstr "" +"通过重排状态更新操作来优化 MTP 执行。[#6844](https://github.com/vllm-project/vllm-" +"ascend/pull/6844)" + +#: ../../source/user_guide/release_notes.md:89 +msgid "" +"Avoid CPU sync in mrope_positions copy by using full tensor copy. " +"[#7014](https://github.com/vllm-project/vllm-ascend/pull/7014)" +msgstr "" +"通过使用完整张量拷贝,避免 mrope_positions 拷贝中的 CPU 同步。[#7014](https://github.com/vllm-project/vllm-" +"ascend/pull/7014)" + +#: ../../source/user_guide/release_notes.md:90 +msgid "" +"Remove H2D synchronization for expert_map in MoE models. " +"[#7000](https://github.com/vllm-project/vllm-ascend/pull/7000)" +msgstr "" +"移除 MoE 模型中 expert_map 的 H2D 同步。[#7000](https://github.com/vllm-project/vllm-" +"ascend/pull/7000)" + +#: ../../source/user_guide/release_notes.md:92 +#: ../../source/user_guide/release_notes.md:199 +#: ../../source/user_guide/release_notes.md:304 +#: ../../source/user_guide/release_notes.md:386 +#: ../../source/user_guide/release_notes.md:441 +#: ../../source/user_guide/release_notes.md:500 +msgid "Dependencies" +msgstr "依赖项" + +#: ../../source/user_guide/release_notes.md:94 +msgid "" +"CANN is upgraded to 8.5.1, please remember to upgrade by hand if you're " +"not using the official image. [#6897](https://github.com/vllm-project" +"/vllm-ascend/pull/6897)" +msgstr "" +"CANN 已升级至 8.5.1,如果您未使用官方镜像,请记得手动升级。[#6897](https://github.com/vllm-project/vllm-" +"ascend/pull/6897)" + +#: ../../source/user_guide/release_notes.md:96 +#: ../../source/user_guide/release_notes.md:203 +#: ../../source/user_guide/release_notes.md:312 +#: ../../source/user_guide/release_notes.md:390 +#: ../../source/user_guide/release_notes.md:447 +#: ../../source/user_guide/release_notes.md:492 +msgid "Deprecation & Breaking Changes" +msgstr "弃用与破坏性变更" + +#: ../../source/user_guide/release_notes.md:98 +msgid "" +"`enable_flash_comm_v1` config option has been renamed back to " +"`enable_sp`. [#6883](https://github.com/vllm-project/vllm-" +"ascend/pull/6883)" +msgstr "" +"配置选项 `enable_flash_comm_v1` 已重命名回 `enable_sp`。[#6883](https://github.com/vllm-project/vllm-" +"ascend/pull/6883)" + +#: ../../source/user_guide/release_notes.md:99 +msgid "" +"The auto-detect quantization format from model files is reverted, in " +"v0.16.0rc1, we still need to add `--quantization ascend` to serve a model" +" quantized by modelslim. It will be added back in the next version after " +"the bug with the remote model id is fixed. [#6873](https://github.com" +"/vllm-project/vllm-ascend/pull/6873)" +msgstr "" +"从模型文件自动检测量化格式的功能已回退,在 v0.16.0rc1 中,我们仍需添加 `--quantization ascend` 来服务由 modelslim 量化的模型。在修复了远程模型 ID 相关的错误后,此功能将在下个版本中重新加入。[#6873](https://github.com/vllm-project/vllm-ascend/pull/6873)" + +#: ../../source/user_guide/release_notes.md:103 +msgid "" +"Added user/developer guide for CPU binding. [#7045](https://github.com" +"/vllm-project/vllm-ascend/pull/7045)" +msgstr "" +"添加了 CPU 绑定的用户/开发者指南。[#7045](https://github.com/vllm-project/vllm-" +"ascend/pull/7045)" + +#: ../../source/user_guide/release_notes.md:104 +msgid "" +"Added metrics usage documentation and example. [#6962](https://github.com" +"/vllm-project/vllm-ascend/pull/6962)" +msgstr "" +"添加了指标使用文档和示例。[#6962](https://github.com/vllm-project/vllm-" +"ascend/pull/6962)" + +#: ../../source/user_guide/release_notes.md:105 +msgid "" +"Added llms.txt for LLM discovery. [#6886](https://github.com/vllm-project" +"/vllm-ascend/pull/6886)" +msgstr "" +"添加了用于 LLM 发现的 llms.txt 文件。[#6886](https://github.com/vllm-project/vllm-" +"ascend/pull/6886)" + +#: ../../source/user_guide/release_notes.md:106 +msgid "" +"Added GLM4.x multi-node deploy tutorial. [#6872](https://github.com/vllm-" +"project/vllm-ascend/pull/6872)" +msgstr "新增 GLM4.x 多节点部署教程。 [#6872](https://github.com/vllm-project/vllm-ascend/pull/6872)" + +#: ../../source/user_guide/release_notes.md:107 +msgid "" +"Added explanation of 310p special param: max-model-len. " +"[#7065](https://github.com/vllm-project/vllm-ascend/pull/7065)" +msgstr "新增 310p 特殊参数 max-model-len 的说明。 [#7065](https://github.com/vllm-project/vllm-ascend/pull/7065)" + +#: ../../source/user_guide/release_notes.md:111 +msgid "" +"Fix openEuler Dockerfile error. [#6871](https://github.com/vllm-project" +"/vllm-ascend/pull/6871)" +msgstr "修复 openEuler Dockerfile 错误。 [#6871](https://github.com/vllm-project/vllm-ascend/pull/6871)" + +#: ../../source/user_guide/release_notes.md:112 +msgid "Many bug fixes including:" +msgstr "多项错误修复,包括:" + +#: ../../source/user_guide/release_notes.md:113 +msgid "" +"Fix Eagle speculative decoding with Context Parallel enabled. " +"[#6981](https://github.com/vllm-project/vllm-ascend/pull/6981) " +"[#7079](https://github.com/vllm-project/vllm-ascend/pull/7079)" +msgstr "修复启用上下文并行时 Eagle 推测式解码的问题。 [#6981](https://github.com/vllm-project/vllm-ascend/pull/6981) [#7079](https://github.com/vllm-project/vllm-ascend/pull/7079)" + +#: ../../source/user_guide/release_notes.md:114 +msgid "" +"Fix LoRA accuracy issue introduced by upstream vLLM changes. " +"[#6958](https://github.com/vllm-project/vllm-ascend/pull/6958)" +msgstr "修复上游 vLLM 变更引入的 LoRA 精度问题。 [#6958](https://github.com/vllm-project/vllm-ascend/pull/6958)" + +#: ../../source/user_guide/release_notes.md:115 +msgid "" +"Fix streaming content-type in load balance proxy server. " +"[#6985](https://github.com/vllm-project/vllm-ascend/pull/6985)" +msgstr "修复负载均衡代理服务器中的流式传输 content-type。 [#6985](https://github.com/vllm-project/vllm-ascend/pull/6985)" + +#: ../../source/user_guide/release_notes.md:116 +msgid "" +"Fix metadata execute error: integer modulo by zero. " +"[#6521](https://github.com/vllm-project/vllm-ascend/pull/6521)" +msgstr "修复元数据执行错误:整数除以零取模。 [#6521](https://github.com/vllm-project/vllm-ascend/pull/6521)" + +#: ../../source/user_guide/release_notes.md:117 +msgid "" +"Fix triton rope_siso implementation bug. [#7082](https://github.com/vllm-" +"project/vllm-ascend/pull/7082)" +msgstr "修复 triton rope_siso 实现中的错误。 [#7082](https://github.com/vllm-project/vllm-ascend/pull/7082)" + +#: ../../source/user_guide/release_notes.md:118 +msgid "" +"Fix incorrect layer count for MTP models in update_aclgraph_sizes. " +"[#7064](https://github.com/vllm-project/vllm-ascend/pull/7064)" +msgstr "修复 update_aclgraph_sizes 中 MTP 模型的层数错误。 [#7064](https://github.com/vllm-project/vllm-ascend/pull/7064)" + +#: ../../source/user_guide/release_notes.md:119 +msgid "" +"Fix compilation errors for CANN versions subsequent to b020. " +"[#7059](https://github.com/vllm-project/vllm-ascend/pull/7059)" +msgstr "修复 b020 之后 CANN 版本的编译错误。 [#7059](https://github.com/vllm-project/vllm-ascend/pull/7059)" + +#: ../../source/user_guide/release_notes.md:120 +msgid "" +"Fix quant config support in GLM4.6V. [#7062](https://github.com/vllm-" +"project/vllm-ascend/pull/7062)" +msgstr "修复 GLM4.6V 中的量化配置支持。 [#7062](https://github.com/vllm-project/vllm-ascend/pull/7062)" + +#: ../../source/user_guide/release_notes.md:121 +msgid "" +"Fix parameter ordering bug in _merge_multimodal_embeddings. " +"[#7068](https://github.com/vllm-project/vllm-ascend/pull/7068)" +msgstr "修复 _merge_multimodal_embeddings 中的参数顺序错误。 [#7068](https://github.com/vllm-project/vllm-ascend/pull/7068)" + +#: ../../source/user_guide/release_notes.md:122 +msgid "" +"Fix fused mc2 bug in EPLB. [#6794](https://github.com/vllm-project/vllm-" +"ascend/pull/6794)" +msgstr "修复 EPLB 中的 fused mc2 错误。 [#6794](https://github.com/vllm-project/vllm-ascend/pull/6794)" + +#: ../../source/user_guide/release_notes.md:123 +msgid "" +"Fix kernel block size for computing slot mapping. " +"[#7019](https://github.com/vllm-project/vllm-ascend/pull/7019)" +msgstr "修复计算槽位映射的内核块大小。 [#7019](https://github.com/vllm-project/vllm-ascend/pull/7019)" + +#: ../../source/user_guide/release_notes.md:124 +msgid "" +"Fix layerwise stacking MTP error in P/D disaggregation. " +"[#7036](https://github.com/vllm-project/vllm-ascend/pull/7036)" +msgstr "修复 P/D 解耦中逐层堆叠 MTP 的错误。 [#7036](https://github.com/vllm-project/vllm-ascend/pull/7036)" + +#: ../../source/user_guide/release_notes.md:125 +msgid "" +"Fix RoPE dimension for npu_rotary_embedding. [#6880](https://github.com" +"/vllm-project/vllm-ascend/pull/6880)" +msgstr "修复 npu_rotary_embedding 的 RoPE 维度。 [#6880](https://github.com/vllm-project/vllm-ascend/pull/6880)" + +#: ../../source/user_guide/release_notes.md:126 +msgid "" +"Fix Qwen-Omni quantization bugs. [#7042](https://github.com/vllm-project" +"/vllm-ascend/pull/7042) [#7007](https://github.com/vllm-project/vllm-" +"ascend/pull/7007)" +msgstr "修复 Qwen-Omni 量化错误。 [#7042](https://github.com/vllm-project/vllm-ascend/pull/7042) [#7007](https://github.com/vllm-project/vllm-ascend/pull/7007)" + +#: ../../source/user_guide/release_notes.md:127 +msgid "" +"Fix GDN layer accuracy in graph mode. [#6822](https://github.com/vllm-" +"project/vllm-ascend/pull/6822)" +msgstr "修复图模式下 GDN 层的精度问题。 [#6822](https://github.com/vllm-project/vllm-ascend/pull/6822)" + +#: ../../source/user_guide/release_notes.md:128 +msgid "" +"Fix precision bugs for PCP/DCP in PD disaggregate. " +"[#6876](https://github.com/vllm-project/vllm-ascend/pull/6876)" +msgstr "修复 PD 解耦中 PCP/DCP 的精度错误。 [#6876](https://github.com/vllm-project/vllm-ascend/pull/6876)" + +#: ../../source/user_guide/release_notes.md:129 +msgid "" +"Fix MTP in PD disaggregation with fullgraph support for all D-Nodes. " +"[#6948](https://github.com/vllm-project/vllm-ascend/pull/6948)" +msgstr "修复 PD 解耦中的 MTP,为所有 D-Node 提供全图支持。 [#6948](https://github.com/vllm-project/vllm-ascend/pull/6948)" + +#: ../../source/user_guide/release_notes.md:130 +msgid "" +"Fix GQA model error when enabling both DP and DCP. " +"[#7012](https://github.com/vllm-project/vllm-ascend/pull/7012)" +msgstr "修复同时启用 DP 和 DCP 时 GQA 模型的错误。 [#7012](https://github.com/vllm-project/vllm-ascend/pull/7012)" + +#: ../../source/user_guide/release_notes.md:131 +msgid "" +"Fix MTP prefill misclassified as decode edge case. " +"[#6835](https://github.com/vllm-project/vllm-ascend/pull/6835)" +msgstr "修复 MTP 预填充被错误分类为解码的边缘情况。 [#6835](https://github.com/vllm-project/vllm-ascend/pull/6835)" + +#: ../../source/user_guide/release_notes.md:132 +msgid "" +"Fix Eagle3 acceptance rate for QuaRot quantized models. " +"[#6914](https://github.com/vllm-project/vllm-ascend/pull/6914)" +msgstr "修复 QuaRot 量化模型的 Eagle3 接受率问题。 [#6914](https://github.com/vllm-project/vllm-ascend/pull/6914)" + +#: ../../source/user_guide/release_notes.md:133 +msgid "" +"Fix RoPE shape mismatch for MTP models with FlashComm V1 enabled. " +"[#6939](https://github.com/vllm-project/vllm-ascend/pull/6939)" +msgstr "修复启用 FlashComm V1 时 MTP 模型的 RoPE 形状不匹配问题。 [#6939](https://github.com/vllm-project/vllm-ascend/pull/6939)" + +#: ../../source/user_guide/release_notes.md:134 +msgid "" +"Fix Qwen2.5VL accuracy issue. [#6975](https://github.com/vllm-project" +"/vllm-ascend/pull/6975)" +msgstr "修复 Qwen2.5VL 精度问题。 [#6975](https://github.com/vllm-project/vllm-ascend/pull/6975)" + +#: ../../source/user_guide/release_notes.md:135 +msgid "" +"Fix MoE forward error with static kernel enabled. " +"[#6964](https://github.com/vllm-project/vllm-ascend/pull/6964)" +msgstr "修复启用静态内核时的 MoE 前向传播错误。 [#6964](https://github.com/vllm-project/vllm-ascend/pull/6964)" + +#: ../../source/user_guide/release_notes.md:136 +msgid "" +"Fix muls_add fusion for GLM5 models. [#6928](https://github.com/vllm-" +"project/vllm-ascend/pull/6928)" +msgstr "修复 GLM5 模型的 muls_add 融合问题。 [#6928](https://github.com/vllm-project/vllm-ascend/pull/6928)" + +#: ../../source/user_guide/release_notes.md:137 +msgid "" +"Fix GDN layer detection for multimodal models. [#6941](https://github.com" +"/vllm-project/vllm-ascend/pull/6941)" +msgstr "修复多模态模型的 GDN 层检测。 [#6941](https://github.com/vllm-project/vllm-ascend/pull/6941)" + +#: ../../source/user_guide/release_notes.md:138 +msgid "" +"Fix 300I unquant model weight nd2nz error. [#6851](https://github.com" +"/vllm-project/vllm-ascend/pull/6851)" +msgstr "修复 300I 非量化模型权重的 nd2nz 错误。 [#6851](https://github.com/vllm-project/vllm-ascend/pull/6851)" + +#: ../../source/user_guide/release_notes.md:139 +msgid "" +"Fix CPU binding logic. [#6889](https://github.com/vllm-project/vllm-" +"ascend/pull/6889)" +msgstr "修复 CPU 绑定逻辑。 [#6889](https://github.com/vllm-project/vllm-ascend/pull/6889)" + +#: ../../source/user_guide/release_notes.md:140 +msgid "" +"Fix Eagle fullgraph shape capture. [#6846](https://github.com/vllm-" +"project/vllm-ascend/pull/6846)" +msgstr "修复 Eagle 全图形状捕获问题。 [#6846](https://github.com/vllm-project/vllm-ascend/pull/6846)" + +#: ../../source/user_guide/release_notes.md:144 +msgid "" +"Currently, for DeepSeek v3.2, PCP & DCP do not yet work with FlashComm1 " +"feature, which may cause serve errors or other unknown errors." +msgstr "目前,对于 DeepSeek v3.2,PCP 和 DCP 尚不能与 FlashComm1 功能协同工作,这可能导致服务错误或其他未知错误。" + +#: ../../source/user_guide/release_notes.md:145 +msgid "" +"In 4-node A3 PD disaggregation deployment with DeepSeek V3.2, the P-Node " +"may hang when benchmarking in high concurrency scenario, e.g., 2K/2K " +"tokens with 512 concurrent requests." +msgstr "在使用 DeepSeek V3.2 的 4 节点 A3 PD 解耦部署中,P-Node 在高并发场景(例如,2K/2K tokens 和 512 个并发请求)下进行基准测试时可能出现挂起。" + +#: ../../source/user_guide/release_notes.md:146 +#, python-brace-format +msgid "" +"MTP with large EP configurations may cause graph capture buffer overflow." +" This is a bug need to fix in vLLM, now there is a workaround to avoid " +"it: explicitly set `--compilation-config " +"'{\"max_cudagraph_capture_size\": N}'` where `N = max_concurrency × (1 + " +"num_speculative_tokens)`." +msgstr "具有大规模 EP 配置的 MTP 可能导致图捕获缓冲区溢出。这是 vLLM 中需要修复的一个错误,目前有一个临时解决方案可以避免此问题:显式设置 `--compilation-config '{\"max_cudagraph_capture_size\": N}'`,其中 `N = max_concurrency × (1 + num_speculative_tokens)`。" + +#: ../../source/user_guide/release_notes.md:148 +msgid "v0.15.0rc1 - 2026.02.27" +msgstr "v0.15.0rc1 - 2026.02.27" + +#: ../../source/user_guide/release_notes.md:150 +msgid "" +"This is the first release candidate of v0.15.0 for vLLM Ascend. Please " +"follow the [official doc](https://docs.vllm.ai/projects/ascend/en/latest)" +" to get started." +msgstr "这是 vLLM Ascend v0.15.0 的第一个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/latest)开始使用。" + +#: ../../source/user_guide/release_notes.md:154 +msgid "" +"**NPU Graph EX (npugraph_ex) Enabled by Default**: The npugraph_ex " +"feature is now enabled by default, providing better graph optimization " +"with integrated inductor pass and MatmulAllReduceAddRMSNorm fusion. " +"[#6354](https://github.com/vllm-project/vllm-ascend/pull/6354) " +"[#6664](https://github.com/vllm-project/vllm-ascend/pull/6664) " +"[#6006](https://github.com/vllm-project/vllm-ascend/pull/6006)" +msgstr "**NPU Graph EX (npugraph_ex) 默认启用**:npugraph_ex 功能现已默认启用,通过集成的 inductor pass 和 MatmulAllReduceAddRMSNorm 融合提供更好的图优化。 [#6354](https://github.com/vllm-project/vllm-ascend/pull/6354) [#6664](https://github.com/vllm-project/vllm-ascend/pull/6664) [#6006](https://github.com/vllm-project/vllm-ascend/pull/6006)" + +#: ../../source/user_guide/release_notes.md:155 +msgid "" +"**310P MoE and W8A8 Support**[Experimental]: 310P now supports MoE " +"models, W8A8 quantization, and weightNZ feature, significantly expanding " +"hardware capabilities. [#6530](https://github.com/vllm-project/vllm-" +"ascend/pull/6530) [#6641](https://github.com/vllm-project/vllm-" +"ascend/pull/6641) [#6454](https://github.com/vllm-project/vllm-" +"ascend/pull/6454) [#6705](https://github.com/vllm-project/vllm-" +"ascend/pull/6705)" +msgstr "**310P MoE 和 W8A8 支持**[实验性]:310P 现在支持 MoE 模型、W8A8 量化和 weightNZ 功能,显著扩展了硬件能力。 [#6530](https://github.com/vllm-project/vllm-ascend/pull/6530) [#6641](https://github.com/vllm-project/vllm-ascend/pull/6641) [#6454](https://github.com/vllm-project/vllm-ascend/pull/6454) [#6705](https://github.com/vllm-project/vllm-ascend/pull/6705)" + +#: ../../source/user_guide/release_notes.md:156 +msgid "" +"**Qwen3-VL-MoE EAGLE Support**: Added EAGLE speculative decoding support " +"for Qwen3-VL-MoE model. [#6327](https://github.com/vllm-project/vllm-" +"ascend/pull/6327)" +msgstr "**Qwen3-VL-MoE EAGLE 支持**:为 Qwen3-VL-MoE 模型新增 EAGLE 推测式解码支持。 [#6327](https://github.com/vllm-project/vllm-ascend/pull/6327)" + +#: ../../source/user_guide/release_notes.md:157 +msgid "" +"**Kimi-K2.5 Model Support**: Added support for Kimi-K2.5 models. **Please" +" note** that vLLM 0.15.0 has a known issue with Kimi-K2.5. To fix this, " +"please apply the changes from the upstream `vllm-project/vllm` " +"repository, specifically from pull requests [#33320](https://github.com" +"/vllm-project/vllm/pull/33320) and [#34501](https://github.com/vllm-" +"project/vllm/pull/34501). [#6755](https://github.com/vllm-project/vllm-" +"ascend/pull/6755)" +msgstr "" +"**Kimi-K2.5 模型支持**:新增对 Kimi-K2.5 模型的支持。**请注意**,vLLM 0.15.0 存在一个与 Kimi-K2.5 相关的已知问题。要修复此问题,请应用上游 `vllm-project/vllm` 仓库的更改,具体来自拉取请求 [#33320](https://github.com/vllm-project/vllm/pull/33320) 和 [#34501](https://github.com/vllm-project/vllm/pull/34501)。[#6755](https://github.com/vllm-project/vllm-ascend/pull/6755)" + +#: ../../source/user_guide/release_notes.md:161 +msgid "" +"**Auto-detect Quantization Format**: Quantization format can now be auto-" +"detected from model files. [#6645](https://github.com/vllm-project/vllm-" +"ascend/pull/6645)" +msgstr "" +"**自动检测量化格式**:现在可以从模型文件中自动检测量化格式。[#6645](https://github.com/vllm-project/vllm-ascend/pull/6645)" + +#: ../../source/user_guide/release_notes.md:162 +msgid "" +"**GPT-OSS Attention Support**: Added GPT-OSS attention implementation. " +"[#5901](https://github.com/vllm-project/vllm-ascend/pull/5901)" +msgstr "" +"**GPT-OSS Attention 支持**:新增 GPT-OSS attention 实现。[#5901](https://github.com/vllm-project/vllm-ascend/pull/5901)" + +#: ../../source/user_guide/release_notes.md:163 +msgid "" +"**DCP Support for SFA**: Added Decode Context Parallel (DCP) support for " +"SFA architecture. [#6563](https://github.com/vllm-project/vllm-" +"ascend/pull/6563)" +msgstr "" +"**SFA 架构的 DCP 支持**:为 SFA 架构新增解码上下文并行(DCP)支持。[#6563](https://github.com/vllm-project/vllm-ascend/pull/6563)" + +#: ../../source/user_guide/release_notes.md:164 +msgid "" +"**Mooncake Layerwise PCP Support**: Mooncake layerwise connector now " +"supports PCP function. [#6627](https://github.com/vllm-project/vllm-" +"ascend/pull/6627)" +msgstr "" +"**Mooncake 分层连接器 PCP 支持**:Mooncake 分层连接器现在支持 PCP 功能。[#6627](https://github.com/vllm-project/vllm-ascend/pull/6627)" + +#: ../../source/user_guide/release_notes.md:165 +msgid "" +"**Mooncake Connector Remote PTP Size**: Mooncake connector can now get " +"remote PTP size. [#5822](https://github.com/vllm-project/vllm-" +"ascend/pull/5822)" +msgstr "" +"**Mooncake 连接器远程 PTP 大小**:Mooncake 连接器现在可以获取远程 PTP 大小。[#5822](https://github.com/vllm-project/vllm-ascend/pull/5822)" + +#: ../../source/user_guide/release_notes.md:166 +msgid "" +"**KV Pool Sparse Attention**: KV pool now supports sparse attention. " +"[#6339](https://github.com/vllm-project/vllm-ascend/pull/6339)" +msgstr "" +"**KV 池稀疏注意力**:KV 池现在支持稀疏注意力。[#6339](https://github.com/vllm-project/vllm-ascend/pull/6339)" + +#: ../../source/user_guide/release_notes.md:167 +msgid "" +"**Batch Invariant with AscendC**: Implemented batch invariant feature " +"with AscendC. [#6590](https://github.com/vllm-project/vllm-" +"ascend/pull/6590)" +msgstr "" +"**基于 AscendC 的 Batch Invariant**:使用 AscendC 实现了 batch invariant 特性。[#6590](https://github.com/vllm-project/vllm-ascend/pull/6590)" + +#: ../../source/user_guide/release_notes.md:168 +msgid "" +"**Routing Replay**: Added routing replay feature. " +"[#6696](https://github.com/vllm-project/vllm-ascend/pull/6696)" +msgstr "" +"**路由重放**:新增路由重放功能。[#6696](https://github.com/vllm-project/vllm-ascend/pull/6696)" + +#: ../../source/user_guide/release_notes.md:169 +msgid "" +"**Compressed Tensors MoE W4A8 Dynamic Weight**: Added support for " +"compressed tensors moe w4a8 dynamic weight quantization. " +"[#5889](https://github.com/vllm-project/vllm-ascend/pull/5889)" +msgstr "" +"**压缩张量 MoE W4A8 动态权重**:新增对压缩张量 MoE W4A8 动态权重量化的支持。[#5889](https://github.com/vllm-project/vllm-ascend/pull/5889)" + +#: ../../source/user_guide/release_notes.md:170 +msgid "" +"**GLM4.7-Flash W8A8 Quantization**: Added W8A8 quantization support for " +"GLM4.7-Flash. [#6492](https://github.com/vllm-project/vllm-" +"ascend/pull/6492)" +msgstr "" +"**GLM4.7-Flash W8A8 量化**:为 GLM4.7-Flash 新增 W8A8 量化支持。[#6492](https://github.com/vllm-project/vllm-ascend/pull/6492)" + +#: ../../source/user_guide/release_notes.md:171 +msgid "" +"**DispatchGmmCombineDecode Enhancement**: DispatchGmmCombineDecode now " +"supports bf16/float16 gmm1/gmm2 weight and ND format weight. " +"[#6393](https://github.com/vllm-project/vllm-ascend/pull/6393)" +msgstr "" +"**DispatchGmmCombineDecode 增强**:DispatchGmmCombineDecode 现在支持 bf16/float16 的 gmm1/gmm2 权重以及 ND 格式权重。[#6393](https://github.com/vllm-project/vllm-ascend/pull/6393)" + +#: ../../source/user_guide/release_notes.md:172 +msgid "" +"**RMSNorm Dynamic Quant Fusion**: Added rmsnorm dynamic quant fusion " +"pass. [#6274](https://github.com/vllm-project/vllm-ascend/pull/6274)" +msgstr "" +"**RMSNorm 动态量化融合**:新增 rmsnorm 动态量化融合 pass。[#6274](https://github.com/vllm-project/vllm-ascend/pull/6274)" + +#: ../../source/user_guide/release_notes.md:173 +msgid "" +"**Worker Health Check Interface**: Added `check_health` interface for " +"worker. [#6681](https://github.com/vllm-project/vllm-ascend/pull/6681)" +msgstr "" +"**Worker 健康检查接口**:为 worker 新增 `check_health` 接口。[#6681](https://github.com/vllm-project/vllm-ascend/pull/6681)" + +#: ../../source/user_guide/release_notes.md:177 +msgid "**310P Support Expansion**: Multiple improvements for 310P hardware:" +msgstr "**310P 支持扩展**:针对 310P 硬件的多项改进:" + +#: ../../source/user_guide/release_notes.md:178 +msgid "" +"Fixed attention accuracy issue on 310P. [#6803](https://github.com/vllm-" +"project/vllm-ascend/pull/6803)" +msgstr "" +"修复了 310P 上的注意力精度问题。[#6803](https://github.com/vllm-project/vllm-ascend/pull/6803)" + +#: ../../source/user_guide/release_notes.md:179 +msgid "" +"Added weightNZ feature for 310P with quant or unquant support. " +"[#6705](https://github.com/vllm-project/vllm-ascend/pull/6705)" +msgstr "" +"为 310P 新增 weightNZ 特性,支持量化或非量化。[#6705](https://github.com/vllm-project/vllm-ascend/pull/6705)" + +#: ../../source/user_guide/release_notes.md:180 +msgid "" +"Added addrmsnorm support for 300I DUO. [#6704](https://github.com/vllm-" +"project/vllm-ascend/pull/6704)" +msgstr "" +"为 300I DUO 新增 addrmsnorm 支持。[#6704](https://github.com/vllm-project/vllm-ascend/pull/6704)" + +#: ../../source/user_guide/release_notes.md:181 +msgid "" +"310P now supports PrefillCacheHit state. [#6756](https://github.com/vllm-" +"project/vllm-ascend/pull/6756)" +msgstr "" +"310P 现在支持 PrefillCacheHit 状态。[#6756](https://github.com/vllm-project/vllm-ascend/pull/6756)" + +#: ../../source/user_guide/release_notes.md:182 +msgid "" +"**ARM-only CPU Binding**: Enabled ARM-only CPU binding with NUMA-balanced" +" A3 policy. [#6686](https://github.com/vllm-project/vllm-" +"ascend/pull/6686)" +msgstr "" +"**仅 ARM CPU 绑定**:启用了仅 ARM CPU 绑定,采用 NUMA 均衡的 A3 策略。[#6686](https://github.com/vllm-project/vllm-ascend/pull/6686)" + +#: ../../source/user_guide/release_notes.md:183 +msgid "" +"**Triton Rope Enhancement**: Triton rope now supports index_selecting " +"from cos_sin_cache. [#5450](https://github.com/vllm-project/vllm-" +"ascend/pull/5450)" +msgstr "" +"**Triton Rope 增强**:Triton rope 现在支持从 cos_sin_cache 进行 index_selecting。[#5450](https://github.com/vllm-project/vllm-ascend/pull/5450)" + +#: ../../source/user_guide/release_notes.md:184 +msgid "" +"**AscendC Fused Op**: Added AscendC fused op transpose_kv_cache_by_block " +"to speed up GQA transfer. [#6366](https://github.com/vllm-project/vllm-" +"ascend/pull/6366)" +msgstr "" +"**AscendC 融合算子**:新增 AscendC 融合算子 transpose_kv_cache_by_block,以加速 GQA 传输。[#6366](https://github.com/vllm-project/vllm-ascend/pull/6366)" + +#: ../../source/user_guide/release_notes.md:185 +msgid "" +"**Rotary_dim Parameter**: Added support for rotary_dim parameter when " +"using partial rope in rotary_embedding. [#6581](https://github.com/vllm-" +"project/vllm-ascend/pull/6581)" +msgstr "" +"**Rotary_dim 参数**:在 rotary_embedding 中使用 partial rope 时,新增对 rotary_dim 参数的支持。[#6581](https://github.com/vllm-project/vllm-ascend/pull/6581)" + +#: ../../source/user_guide/release_notes.md:189 +msgid "" +"**Multimodal seq_lens CPU Cache**: Use `seq_lens` CPU cache to avoid " +"frequent D2H copy for better multimodal performance. " +"[#6448](https://github.com/vllm-project/vllm-ascend/pull/6448)" +msgstr "" +"**多模态 seq_lens CPU 缓存**:使用 `seq_lens` CPU 缓存以避免频繁的 D2H 拷贝,从而提升多模态性能。[#6448](https://github.com/vllm-project/vllm-ascend/pull/6448)" + +#: ../../source/user_guide/release_notes.md:190 +msgid "" +"**DispatchFFNCombine Optimization**: Optimized DispatchFFNCombine kernel " +"performance and resolved vector error caused by unaligned UB access. " +"[#6468](https://github.com/vllm-project/vllm-ascend/pull/6468) " +"[#6707](https://github.com/vllm-project/vllm-ascend/pull/6707)" +msgstr "" +"**DispatchFFNCombine 优化**:优化了 DispatchFFNCombine 内核性能,并解决了因未对齐 UB 访问导致的向量错误。[#6468](https://github.com/vllm-project/vllm-ascend/pull/6468) [#6707](https://github.com/vllm-project/vllm-ascend/pull/6707)" + +#: ../../source/user_guide/release_notes.md:191 +msgid "" +"**DeepSeek V3.2 KVCache Optimization**: Optimized KV cache usage for " +"DeepSeek V3.2. [#6610](https://github.com/vllm-project/vllm-" +"ascend/pull/6610)" +msgstr "" +"**DeepSeek V3.2 KVCache 优化**:优化了 DeepSeek V3.2 的 KV 缓存使用。[#6610](https://github.com/vllm-project/vllm-ascend/pull/6610)" + +#: ../../source/user_guide/release_notes.md:192 +msgid "" +"**MLA/SFA Weight Prefetch**: Refactored MLA/SFA weight prefetch to be " +"consistent with MoE weight prefetch. [#6629](https://github.com/vllm-" +"project/vllm-ascend/pull/6629)" +msgstr "" +"**MLA/SFA 权重预取**:重构了 MLA/SFA 权重预取,使其与 MoE 权重预取保持一致。[#6629](https://github.com/vllm-project/vllm-ascend/pull/6629)" + +#: ../../source/user_guide/release_notes.md:193 +msgid "" +"**MLP Weight Prefetch**: Refactored MLP weight prefetch to be consistent " +"with MoE model's prefetching. [#6442](https://github.com/vllm-project" +"/vllm-ascend/pull/6442)" +msgstr "" +"**MLP 权重预取**:重构了 MLP 权重预取,使其与 MoE 模型的预取保持一致。[#6442](https://github.com/vllm-project/vllm-ascend/pull/6442)" + +#: ../../source/user_guide/release_notes.md:194 +msgid "" +"**Adaptive Block Size Selection**: Added adaptive block size selection in" +" linear_persistent kernel. [#6537](https://github.com/vllm-project/vllm-" +"ascend/pull/6537)" +msgstr "" +"**自适应块大小选择**:在 linear_persistent 内核中新增自适应块大小选择功能。[#6537](https://github.com/vllm-project/vllm-ascend/pull/6537)" + +#: ../../source/user_guide/release_notes.md:195 +msgid "" +"**EPLB Memory Optimization**: Reduced memory used for heat aggregation in" +" EPLB. [#6729](https://github.com/vllm-project/vllm-ascend/pull/6729)" +msgstr "" +"**EPLB 内存优化**:减少了 EPLB 中用于热度聚合的内存使用。[#6729](https://github.com/vllm-project/vllm-ascend/pull/6729)" + +#: ../../source/user_guide/release_notes.md:196 +msgid "" +"**Memory Migration and Interrupt Core Binding**: Improved binding logic " +"with memory migration and interrupt core binding functions. " +"[#6785](https://github.com/vllm-project/vllm-ascend/pull/6785)" +msgstr "" +"**内存迁移与中断核心绑定**:改进了绑定逻辑,增加了内存迁移和中断核心绑定功能。[#6785](https://github.com/vllm-project/vllm-ascend/pull/6785)" + +#: ../../source/user_guide/release_notes.md:197 +msgid "" +"**Triton Stability**: Improved Triton stability on Ascend for large " +"grids. [#6301](https://github.com/vllm-project/vllm-ascend/pull/6301)" +msgstr "" +"**Triton 稳定性**:提升了 Triton 在 Ascend 上处理大规模网格时的稳定性。[#6301](https://github.com/vllm-project/vllm-ascend/pull/6301)" + +#: ../../source/user_guide/release_notes.md:201 +msgid "" +"**Mooncake**: Upgraded to v0.3.8.post1. [#6428](https://github.com/vllm-" +"project/vllm-ascend/pull/6428)" +msgstr "" +"**Mooncake**:升级至 v0.3.8.post1。[#6428](https://github.com/vllm-project/vllm-ascend/pull/6428)" + +#: ../../source/user_guide/release_notes.md:205 +msgid "" +"**ProfileExecuteDuration**: Cleaned up and deprecated " +"ProfileExecuteDuration feature. [#6461](https://github.com/vllm-project" +"/vllm-ascend/pull/6461)" +msgstr "" +"**ProfileExecuteDuration**:清理并弃用了 ProfileExecuteDuration 功能。[#6461](https://github.com/vllm-project/vllm-ascend/pull/6461)" + +#: ../../source/user_guide/release_notes.md:206 +msgid "" +"**Custom rotary_embedding Operator**: Removed custom rotary_embedding " +"operator. [#6523](https://github.com/vllm-project/vllm-ascend/pull/6523)" +msgstr "" +"**自定义 rotary_embedding 算子**:移除了自定义 rotary_embedding 算子。[#6523](https://github.com/vllm-project/vllm-ascend/pull/6523)" + +#: ../../source/user_guide/release_notes.md:207 +msgid "" +"**USE_OPTIMIZED_MODEL**: Cleaned up unused env `USE_OPTIMIZED_MODEL`. " +"[#6618](https://github.com/vllm-project/vllm-ascend/pull/6618)" +msgstr "" +"**USE_OPTIMIZED_MODEL**:清理了未使用的环境变量 `USE_OPTIMIZED_MODEL`。[#6618](https://github.com/vllm-project/vllm-ascend/pull/6618)" + +#: ../../source/user_guide/release_notes.md:211 +msgid "" +"Added AI-assisted model-adaptation workflow documentation for vllm-" +"ascend. [#6731](https://github.com/vllm-project/vllm-ascend/pull/6731)" +msgstr "" +"新增了 vllm-ascend 的 AI 辅助模型适配工作流文档。[#6731](https://github.com/vllm-project/vllm-ascend/pull/6731)" + +#: ../../source/user_guide/release_notes.md:212 +msgid "" +"Added vLLM Ascend development guidelines (AGETNS.md). " +"[#6797](https://github.com/vllm-project/vllm-ascend/pull/6797)" +msgstr "" +"新增了 vLLM Ascend 开发指南 (AGETNS.md)。[#6797](https://github.com/vllm-project/vllm-ascend/pull/6797)" + +#: ../../source/user_guide/release_notes.md:213 +msgid "" +"Added GLM5 tutorial documentation. [#6709](https://github.com/vllm-" +"project/vllm-ascend/pull/6709) [#6717](https://github.com/vllm-project" +"/vllm-ascend/pull/6717)" +msgstr "" +"新增 GLM5 教程文档。 [#6709](https://github.com/vllm-project/vllm-" +"ascend/pull/6709) [#6717](https://github.com/vllm-project/vllm-" +"ascend/pull/6717)" + +#: ../../source/user_guide/release_notes.md:214 +msgid "" +"Added Memcache Usage Guide. [#6476](https://github.com/vllm-project/vllm-" +"ascend/pull/6476)" +msgstr "" +"新增 Memcache 使用指南。 [#6476](https://github.com/vllm-project/vllm-" +"ascend/pull/6476)" + +#: ../../source/user_guide/release_notes.md:215 +msgid "" +"Added request forwarding documentation. [#6780](https://github.com/vllm-" +"project/vllm-ascend/pull/6780)" +msgstr "" +"新增请求转发文档。 [#6780](https://github.com/vllm-project/vllm-" +"ascend/pull/6780)" + +#: ../../source/user_guide/release_notes.md:216 +msgid "" +"Added Benchmark Tutorial for Suffix Speculative Decoding. " +"[#6323](https://github.com/vllm-project/vllm-ascend/pull/6323)" +msgstr "" +"新增后缀推测解码的基准测试教程。 [#6323](https://github.com/vllm-project/vllm-" +"ascend/pull/6323)" + +#: ../../source/user_guide/release_notes.md:217 +msgid "" +"Restructured tutorial documentation. [#6501](https://github.com/vllm-" +"project/vllm-ascend/pull/6501)" +msgstr "" +"重构了教程文档结构。 [#6501](https://github.com/vllm-project/vllm-" +"ascend/pull/6501)" + +#: ../../source/user_guide/release_notes.md:218 +msgid "" +"Added npugraph_ex introduction documentation. [#6306](https://github.com" +"/vllm-project/vllm-ascend/pull/6306)" +msgstr "" +"新增 npugraph_ex 介绍文档。 [#6306](https://github.com/vllm-project/vllm-" +"ascend/pull/6306)" + +#: ../../source/user_guide/release_notes.md:222 +msgid "" +"**MTP in PD Fullgraph**: Fixed support for ALL D-Nodes in fullgraph when " +"running MTP in PD deployment. [#5472](https://github.com/vllm-project" +"/vllm-ascend/pull/5472)" +msgstr "" +"**PD 全图中的 MTP**:修复了在 PD 部署中运行 MTP 时,全图模式下对所有 D-Nodes 的支持问题。 [#5472](https://github.com/vllm-project/vllm-ascend/pull/5472)" + +#: ../../source/user_guide/release_notes.md:223 +msgid "" +"**DeepSeekV3.1 Accuracy**: Fixed DeepSeekV3.1 accuracy issue. " +"[#6805](https://github.com/vllm-project/vllm-ascend/pull/6805)" +msgstr "" +"**DeepSeekV3.1 准确性**:修复了 DeepSeekV3.1 的准确性问题。 [#6805](https://github.com/vllm-project/vllm-ascend/pull/6805)" + +#: ../../source/user_guide/release_notes.md:224 +msgid "" +"**EAGLE Refactor**: Routed MTP to EAGLE except for PCP/DCP+MTP cases. " +"[#6349](https://github.com/vllm-project/vllm-ascend/pull/6349)" +msgstr "" +"**EAGLE 重构**:将 MTP 路由至 EAGLE,PCP/DCP+MTP 情况除外。 [#6349](https://github.com/vllm-project/vllm-ascend/pull/6349)" + +#: ../../source/user_guide/release_notes.md:225 +msgid "" +"**Speculative Decoding Accuracy**: Fixed spec acceptance rate problem in " +"vLLM 0.15.0. [#6606](https://github.com/vllm-project/vllm-" +"ascend/pull/6606)" +msgstr "" +"**推测解码准确性**:修复了 vLLM 0.15.0 中的推测接受率问题。 [#6606](https://github.com/vllm-project/vllm-ascend/pull/6606)" + +#: ../../source/user_guide/release_notes.md:226 +msgid "" +"**PCP/DCP Accuracy**: Fixed accuracy issue in PCP/DCP with speculative " +"decoding. [#6491](https://github.com/vllm-project/vllm-ascend/pull/6491)" +msgstr "" +"**PCP/DCP 准确性**:修复了 PCP/DCP 结合推测解码时的准确性问题。 [#6491](https://github.com/vllm-project/vllm-ascend/pull/6491)" + +#: ../../source/user_guide/release_notes.md:227 +msgid "" +"**Dynamic EPLB**: Fixed ineffective dynamic EPLB bug and EPLB no longer " +"depends on a specified model. [#6653](https://github.com/vllm-project" +"/vllm-ascend/pull/6653) [#6528](https://github.com/vllm-project/vllm-" +"ascend/pull/6528)" +msgstr "" +"**动态 EPLB**:修复了动态 EPLB 无效的缺陷,且 EPLB 不再依赖特定模型。 [#6653](https://github.com/vllm-project/vllm-ascend/pull/6653) [#6528](https://github.com/vllm-project/vllm-ascend/pull/6528)" + +#: ../../source/user_guide/release_notes.md:228 +msgid "" +"**KV Pool Mooncake Backend**: Correctly initialized head_or_tp_rank for " +"mooncake backend. [#6498](https://github.com/vllm-project/vllm-" +"ascend/pull/6498)" +msgstr "" +"**KV 池 Mooncake 后端**:正确初始化了 mooncake 后端的 head_or_tp_rank。 [#6498](https://github.com/vllm-project/vllm-ascend/pull/6498)" + +#: ../../source/user_guide/release_notes.md:229 +msgid "" +"**Layerwise Connector Recompute Scheduler**: Layerwise connector now " +"supports recompute scheduler. [#5900](https://github.com/vllm-project" +"/vllm-ascend/pull/5900)" +msgstr "" +"**分层连接器重计算调度器**:分层连接器现在支持重计算调度器。 [#5900](https://github.com/vllm-project/vllm-ascend/pull/5900)" + +#: ../../source/user_guide/release_notes.md:230 +msgid "" +"**Memcache Pool**: Fixed service startup failure when memcache pool is " +"enabled. [#6229](https://github.com/vllm-project/vllm-ascend/pull/6229)" +msgstr "" +"**Memcache 池**:修复了启用 memcache 池时服务启动失败的问题。 [#6229](https://github.com/vllm-project/vllm-ascend/pull/6229)" + +#: ../../source/user_guide/release_notes.md:231 +msgid "" +"**AddRMSNormQuant**: Fixed AddRMSNormQuant not taking effect. " +"[#6620](https://github.com/vllm-project/vllm-ascend/pull/6620)" +msgstr "" +"**AddRMSNormQuant**:修复了 AddRMSNormQuant 未生效的问题。 [#6620](https://github.com/vllm-project/vllm-ascend/pull/6620)" + +#: ../../source/user_guide/release_notes.md:232 +msgid "" +"**Pooling Code**: Fixed pooling code issues and updated usage guide. " +"[#6126](https://github.com/vllm-project/vllm-ascend/pull/6126)" +msgstr "" +"**池化代码**:修复了池化代码问题并更新了使用指南。 [#6126](https://github.com/vllm-project/vllm-ascend/pull/6126)" + +#: ../../source/user_guide/release_notes.md:233 +msgid "" +"**Context Parallel**: Fixed and unified the PD request discrimination " +"logic. [#5939](https://github.com/vllm-project/vllm-ascend/pull/5939)" +msgstr "" +"**上下文并行**:修复并统一了 PD 请求判别逻辑。 [#5939](https://github.com/vllm-project/vllm-ascend/pull/5939)" + +#: ../../source/user_guide/release_notes.md:234 +msgid "" +"**npugraph_ex**: Fixed duplicate pattern issue and added extra check for " +"allreduce rmsnorm fusion pass. [#6513](https://github.com/vllm-project" +"/vllm-ascend/pull/6513) [#6430](https://github.com/vllm-project/vllm-" +"ascend/pull/6430)" +msgstr "" +"**npugraph_ex**:修复了重复模式问题,并为 allreduce rmsnorm 融合通道添加了额外检查。 [#6513](https://github.com/vllm-project/vllm-ascend/pull/6513) [#6430](https://github.com/vllm-project/vllm-ascend/pull/6430)" + +#: ../../source/user_guide/release_notes.md:235 +msgid "" +"**RecomputeScheduler**: Fixed incompatibility of RecomputeScheduler with " +"vLLM v0.14.1. [#6286](https://github.com/vllm-project/vllm-" +"ascend/pull/6286)" +msgstr "" +"**RecomputeScheduler**:修复了 RecomputeScheduler 与 vLLM v0.14.1 的兼容性问题。 [#6286](https://github.com/vllm-project/vllm-ascend/pull/6286)" + +#: ../../source/user_guide/release_notes.md:237 +msgid "v0.13.0 - 2026.02.06" +msgstr "v0.13.0 - 2026年02月06日" + +#: ../../source/user_guide/release_notes.md:239 +msgid "" +"This is the final release of v0.13.0 for vLLM Ascend. Please follow the " +"[official doc](https://docs.vllm.ai/projects/ascend/en/v0.13.0/) to get " +"started." +msgstr "" +"这是 vLLM Ascend v0.13.0 的最终版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.13.0/)开始使用。" + +#: ../../source/user_guide/release_notes.md:243 +msgid "**Model Support**" +msgstr "**模型支持**" + +#: ../../source/user_guide/release_notes.md:245 +msgid "" +"**DeepSeek-R1 & DeepSeek-V3.2**: [Experimental]Performance optimizations," +" and async scheduling enhancements. [#3631](https://github.com/vllm-" +"project/vllm-ascend/pull/3631) [#3900](https://github.com/vllm-project" +"/vllm-ascend/pull/3900) [#3908](https://github.com/vllm-project/vllm-" +"ascend/pull/3908) [#4191](https://github.com/vllm-project/vllm-" +"ascend/pull/4191) [#4805](https://github.com/vllm-project/vllm-" +"ascend/pull/4805)" +msgstr "" +"**DeepSeek-R1 & DeepSeek-V3.2**:[实验性]性能优化和异步调度增强。 [#3631](https://github.com/vllm-project/vllm-ascend/pull/3631) [#3900](https://github.com/vllm-project/vllm-ascend/pull/3900) [#3908](https://github.com/vllm-project/vllm-ascend/pull/3908) [#4191](https://github.com/vllm-project/vllm-ascend/pull/4191) [#4805](https://github.com/vllm-project/vllm-ascend/pull/4805)" + +#: ../../source/user_guide/release_notes.md:246 +msgid "" +"**Qwen3-Next**: [Experimental]Full support for Qwen3-Next series " +"including 80B-A3B-Instruct with full graph mode, MTP, quantization " +"(W8A8), NZ optimization, and chunked prefill. Fixed multiple accuracy and" +" stability issues. [#3450](https://github.com/vllm-project/vllm-" +"ascend/pull/3450) [#3572](https://github.com/vllm-project/vllm-" +"ascend/pull/3572) [#3428](https://github.com/vllm-project/vllm-" +"ascend/pull/3428) [#3918](https://github.com/vllm-project/vllm-" +"ascend/pull/3918) [#4058](https://github.com/vllm-project/vllm-" +"ascend/pull/4058) [#4245](https://github.com/vllm-project/vllm-" +"ascend/pull/4245) [#4070](https://github.com/vllm-project/vllm-" +"ascend/pull/4070) [#4477](https://github.com/vllm-project/vllm-" +"ascend/pull/4477) [#4770](https://github.com/vllm-project/vllm-" +"ascend/pull/4770)" +msgstr "" +"**Qwen3-Next**:[实验性]全面支持 Qwen3-Next 系列模型,包括 80B-A3B-Instruct,支持全图模式、MTP、量化(W8A8)、NZ 优化和分块预填充。修复了多个准确性和稳定性问题。 [#3450](https://github.com/vllm-project/vllm-ascend/pull/3450) [#3572](https://github.com/vllm-project/vllm-ascend/pull/3572) [#3428](https://github.com/vllm-project/vllm-ascend/pull/3428) [#3918](https://github.com/vllm-project/vllm-ascend/pull/3918) [#4058](https://github.com/vllm-project/vllm-ascend/pull/4058) [#4245](https://github.com/vllm-project/vllm-ascend/pull/4245) [#4070](https://github.com/vllm-project/vllm-ascend/pull/4070) [#4477](https://github.com/vllm-project/vllm-ascend/pull/4477) [#4770](https://github.com/vllm-project/vllm-ascend/pull/4770)" + +#: ../../source/user_guide/release_notes.md:247 +msgid "" +"**InternVL**: Added support for InternVL models with comprehensive e2e " +"tests and accuracy evaluation. [#3796](https://github.com/vllm-project" +"/vllm-ascend/pull/3796) [#3964](https://github.com/vllm-project/vllm-" +"ascend/pull/3964)" +msgstr "" +"**InternVL**:新增对 InternVL 模型的支持,包含全面的端到端测试和准确性评估。 [#3796](https://github.com/vllm-project/vllm-ascend/pull/3796) [#3964](https://github.com/vllm-project/vllm-ascend/pull/3964)" + +#: ../../source/user_guide/release_notes.md:248 +msgid "" +"**LongCat-Flash**: [Experimental]Added support for LongCat-Flash model. " +"[#3833](https://github.com/vllm-project/vllm-ascend/pull/3833)" +msgstr "" +"**LongCat-Flash**:[实验性]新增对 LongCat-Flash 模型的支持。 [#3833](https://github.com/vllm-project/vllm-ascend/pull/3833)" + +#: ../../source/user_guide/release_notes.md:249 +msgid "" +"**minimax_m2**: [Experimental]Added support for minimax_m2 model. " +"[#5624](https://github.com/vllm-project/vllm-ascend/pull/5624)" +msgstr "" +"**minimax_m2**:[实验性]新增对 minimax_m2 模型的支持。 [#5624](https://github.com/vllm-project/vllm-ascend/pull/5624)" + +#: ../../source/user_guide/release_notes.md:250 +msgid "" +"**Whisper & Cross-Attention**: [Experimental]Added support for cross-" +"attention and Whisper models. [#5592](https://github.com/vllm-project" +"/vllm-ascend/pull/5592)" +msgstr "" +"**Whisper 与交叉注意力**:[实验性]新增对交叉注意力和 Whisper 模型的支持。 [#5592](https://github.com/vllm-project/vllm-ascend/pull/5592)" + +#: ../../source/user_guide/release_notes.md:251 +msgid "" +"**Pooling Models**: [Experimental]Added support for pooling models with " +"PCP adaptation and fixed multiple pooling-related bugs. " +"[#3122](https://github.com/vllm-project/vllm-ascend/pull/3122) " +"[#4143](https://github.com/vllm-project/vllm-ascend/pull/4143) " +"[#6056](https://github.com/vllm-project/vllm-ascend/pull/6056) " +"[#6057](https://github.com/vllm-project/vllm-ascend/pull/6057) " +"[#6146](https://github.com/vllm-project/vllm-ascend/pull/6146)" +msgstr "" +"**池化模型**:[实验性]新增对池化模型的支持,包含 PCP 适配,并修复了多个与池化相关的缺陷。 [#3122](https://github.com/vllm-project/vllm-ascend/pull/3122) [#4143](https://github.com/vllm-project/vllm-ascend/pull/4143) [#6056](https://github.com/vllm-project/vllm-ascend/pull/6056) [#6057](https://github.com/vllm-project/vllm-ascend/pull/6057) [#6146](https://github.com/vllm-project/vllm-ascend/pull/6146)" + +#: ../../source/user_guide/release_notes.md:252 +msgid "" +"**PanguUltraMoE**: [Experimental]Added support for PanguUltraMoE model. " +"[#4615](https://github.com/vllm-project/vllm-ascend/pull/4615)" +msgstr "" +"**PanguUltraMoE**:[实验性]新增对 PanguUltraMoE 模型的支持。 [#4615](https://github.com/vllm-project/vllm-ascend/pull/4615)" + +#: ../../source/user_guide/release_notes.md:254 +msgid "**Core Features**" +msgstr "**核心功能**" + +#: ../../source/user_guide/release_notes.md:256 +msgid "" +"**Context Parallel (PCP/DCP)**: [Experimental] Added comprehensive " +"support for Prefill Context Parallel (PCP) and Decode Context Parallel " +"(DCP) with ACLGraph, MTP, chunked prefill, MLAPO, and Mooncake connector " +"integration. This is an experimental feature - feedback welcome. " +"[#3260](https://github.com/vllm-project/vllm-ascend/pull/3260) " +"[#3731](https://github.com/vllm-project/vllm-ascend/pull/3731) " +"[#3801](https://github.com/vllm-project/vllm-ascend/pull/3801) " +"[#3980](https://github.com/vllm-project/vllm-ascend/pull/3980) " +"[#4066](https://github.com/vllm-project/vllm-ascend/pull/4066) " +"[#4098](https://github.com/vllm-project/vllm-ascend/pull/4098) " +"[#4183](https://github.com/vllm-project/vllm-ascend/pull/4183) " +"[#5672](https://github.com/vllm-project/vllm-ascend/pull/5672)" +msgstr "" +"**上下文并行 (PCP/DCP)**: [实验性] 新增对预填充上下文并行 (PCP) 和解码上下文并行 (DCP) 的全面支持,集成了 ACLGraph、MTP、分块预填充、MLAPO 和 Mooncake 连接器。此为实验性功能,欢迎反馈。" +"[#3260](https://github.com/vllm-project/vllm-ascend/pull/3260) " +"[#3731](https://github.com/vllm-project/vllm-ascend/pull/3731) " +"[#3801](https://github.com/vllm-project/vllm-ascend/pull/3801) " +"[#3980](https://github.com/vllm-project/vllm-ascend/pull/3980) " +"[#4066](https://github.com/vllm-project/vllm-ascend/pull/4066) " +"[#4098](https://github.com/vllm-project/vllm-ascend/pull/4098) " +"[#4183](https://github.com/vllm-project/vllm-ascend/pull/4183) " +"[#5672](https://github.com/vllm-project/vllm-ascend/pull/5672)" + +#: ../../source/user_guide/release_notes.md:257 +msgid "" +"**Full Graph Mode (ACLGraph)**: [Experimental]Enhanced full graph mode " +"with GQA support, memory optimizations, unified logic between ACLGraph " +"and Torchair, and improved stability. [#3560](https://github.com/vllm-" +"project/vllm-ascend/pull/3560) [#3970](https://github.com/vllm-project" +"/vllm-ascend/pull/3970) [#3812](https://github.com/vllm-project/vllm-" +"ascend/pull/3812) [#3879](https://github.com/vllm-project/vllm-" +"ascend/pull/3879) [#3888](https://github.com/vllm-project/vllm-" +"ascend/pull/3888) [#3894](https://github.com/vllm-project/vllm-" +"ascend/pull/3894) [#5118](https://github.com/vllm-project/vllm-" +"ascend/pull/5118)" +msgstr "" +"**全图模式 (ACLGraph)**: [实验性] 增强了全图模式,支持 GQA,进行了内存优化,统一了 ACLGraph 与 Torchair 之间的逻辑,并提升了稳定性。" +"[#3560](https://github.com/vllm-project/vllm-ascend/pull/3560) " +"[#3970](https://github.com/vllm-project/vllm-ascend/pull/3970) " +"[#3812](https://github.com/vllm-project/vllm-ascend/pull/3812) " +"[#3879](https://github.com/vllm-project/vllm-ascend/pull/3879) " +"[#3888](https://github.com/vllm-project/vllm-ascend/pull/3888) " +"[#3894](https://github.com/vllm-project/vllm-ascend/pull/3894) " +"[#5118](https://github.com/vllm-project/vllm-ascend/pull/5118)" + +#: ../../source/user_guide/release_notes.md:258 +msgid "" +"**Multi-Token Prediction (MTP)**: Significantly improved MTP support with" +" chunked prefill for DeepSeek, quantization support, full graph mode, " +"PCP/DCP integration, and async scheduling. MTP now works in most cases " +"and is recommended for use. [#2711](https://github.com/vllm-project/vllm-" +"ascend/pull/2711) [#2713](https://github.com/vllm-project/vllm-" +"ascend/pull/2713) [#3620](https://github.com/vllm-project/vllm-" +"ascend/pull/3620) [#3845](https://github.com/vllm-project/vllm-" +"ascend/pull/3845) [#3910](https://github.com/vllm-project/vllm-" +"ascend/pull/3910) [#3915](https://github.com/vllm-project/vllm-" +"ascend/pull/3915) [#4102](https://github.com/vllm-project/vllm-" +"ascend/pull/4102) [#4111](https://github.com/vllm-project/vllm-" +"ascend/pull/4111) [#4770](https://github.com/vllm-project/vllm-" +"ascend/pull/4770) [#5477](https://github.com/vllm-project/vllm-" +"ascend/pull/5477)" +msgstr "" +"**多令牌预测 (MTP)**: 显著改进了 MTP 支持,包括针对 DeepSeek 的分块预填充、量化支持、全图模式、PCP/DCP 集成和异步调度。MTP 现在在大多数情况下可用,推荐使用。" +"[#2711](https://github.com/vllm-project/vllm-ascend/pull/2711) " +"[#2713](https://github.com/vllm-project/vllm-ascend/pull/2713) " +"[#3620](https://github.com/vllm-project/vllm-ascend/pull/3620) " +"[#3845](https://github.com/vllm-project/vllm-ascend/pull/3845) " +"[#3910](https://github.com/vllm-project/vllm-ascend/pull/3910) " +"[#3915](https://github.com/vllm-project/vllm-ascend/pull/3915) " +"[#4102](https://github.com/vllm-project/vllm-ascend/pull/4102) " +"[#4111](https://github.com/vllm-project/vllm-ascend/pull/4111) " +"[#4770](https://github.com/vllm-project/vllm-ascend/pull/4770) " +"[#5477](https://github.com/vllm-project/vllm-ascend/pull/5477)" + +#: ../../source/user_guide/release_notes.md:259 +msgid "" +"**Eagle Speculative Decoding**: Eagle spec decode now works with full " +"graph mode and is more stable. [#5118](https://github.com/vllm-project" +"/vllm-ascend/pull/5118) [#4893](https://github.com/vllm-project/vllm-" +"ascend/pull/4893) [#5804](https://github.com/vllm-project/vllm-" +"ascend/pull/5804)" +msgstr "" +"**Eagle 推测解码**: Eagle 推测解码现在可与全图模式协同工作,且更加稳定。" +"[#5118](https://github.com/vllm-project/vllm-ascend/pull/5118) " +"[#4893](https://github.com/vllm-project/vllm-ascend/pull/4893) " +"[#5804](https://github.com/vllm-project/vllm-ascend/pull/5804)" + +#: ../../source/user_guide/release_notes.md:260 +msgid "" +"**PD Disaggregation**: Set ADXL engine as default backend for " +"disaggregated prefill with improved performance and stability. Added " +"support for KV NZ feature for DeepSeek decode node. " +"[#3761](https://github.com/vllm-project/vllm-ascend/pull/3761) " +"[#3950](https://github.com/vllm-project/vllm-ascend/pull/3950) " +"[#5008](https://github.com/vllm-project/vllm-ascend/pull/5008) " +"[#3072](https://github.com/vllm-project/vllm-ascend/pull/3072)" +msgstr "" +"**PD 解耦**: 将 ADXL 引擎设置为解耦预填充的默认后端,提升了性能和稳定性。为 DeepSeek 解码节点增加了 KV NZ 功能支持。" +"[#3761](https://github.com/vllm-project/vllm-ascend/pull/3761) " +"[#3950](https://github.com/vllm-project/vllm-ascend/pull/3950) " +"[#5008](https://github.com/vllm-project/vllm-ascend/pull/5008) " +"[#3072](https://github.com/vllm-project/vllm-ascend/pull/3072)" + +#: ../../source/user_guide/release_notes.md:261 +msgid "" +"**KV Pool & Mooncake**: Enhanced KV pool with Mooncake connector support " +"for PCP/DCP, multiple input suffixes, and improved performance of " +"Layerwise Connector. [#3690](https://github.com/vllm-project/vllm-" +"ascend/pull/3690) [#3752](https://github.com/vllm-project/vllm-" +"ascend/pull/3752) [#3849](https://github.com/vllm-project/vllm-" +"ascend/pull/3849) [#4183](https://github.com/vllm-project/vllm-" +"ascend/pull/4183) [#5303](https://github.com/vllm-project/vllm-" +"ascend/pull/5303)" +msgstr "" +"**KV 池 & Mooncake**: 增强了 KV 池,支持用于 PCP/DCP 的 Mooncake 连接器、多输入后缀,并提升了 Layerwise 连接器的性能。" +"[#3690](https://github.com/vllm-project/vllm-ascend/pull/3690) " +"[#3752](https://github.com/vllm-project/vllm-ascend/pull/3752) " +"[#3849](https://github.com/vllm-project/vllm-ascend/pull/3849) " +"[#4183](https://github.com/vllm-project/vllm-ascend/pull/4183) " +"[#5303](https://github.com/vllm-project/vllm-ascend/pull/5303)" + +#: ../../source/user_guide/release_notes.md:262 +msgid "" +"**EPLB (Elastic Prefill Load Balancing)**: [Experimental]EPLB is now more" +" stable with many bug fixes. Mix placement now works. " +"[#6086](https://github.com/vllm-project/vllm-ascend/pull/6086)" +msgstr "" +"**EPLB (弹性预填充负载均衡)**: [实验性] EPLB 现在更加稳定,修复了许多错误。混合放置现已可用。" +"[#6086](https://github.com/vllm-project/vllm-ascend/pull/6086)" + +#: ../../source/user_guide/release_notes.md:263 +msgid "" +"**Full Decode Only Mode**: Added support for Qwen3-Next and DeepSeekv32 " +"in full_decode_only mode with bug fixes. [#3949](https://github.com/vllm-" +"project/vllm-ascend/pull/3949) [#3986](https://github.com/vllm-project" +"/vllm-ascend/pull/3986) [#3763](https://github.com/vllm-project/vllm-" +"ascend/pull/3763)" +msgstr "" +"**纯解码模式**: 在 full_decode_only 模式下增加了对 Qwen3-Next 和 DeepSeekv32 的支持,并修复了相关错误。" +"[#3949](https://github.com/vllm-project/vllm-ascend/pull/3949) " +"[#3986](https://github.com/vllm-project/vllm-ascend/pull/3986) " +"[#3763](https://github.com/vllm-project/vllm-ascend/pull/3763)" + +#: ../../source/user_guide/release_notes.md:264 +msgid "" +"**Model Runner V2**: [Experimental]Added basic support for Model Runner " +"V2, the next generation of vLLM. It will be used by default in future " +"releases. [#5210](https://github.com/vllm-project/vllm-ascend/pull/5210)" +msgstr "" +"**Model Runner V2**: [实验性] 新增对下一代 vLLM 的 Model Runner V2 的基本支持。它将在未来的版本中默认启用。" +"[#5210](https://github.com/vllm-project/vllm-ascend/pull/5210)" + +#: ../../source/user_guide/release_notes.md:268 +msgid "" +"**W8A16 Quantization**: [Experimental]Added new W8A16 quantization method" +" support. [#4541](https://github.com/vllm-project/vllm-ascend/pull/4541)" +msgstr "" +"**W8A16 量化**: [实验性] 新增对 W8A16 量化方法的支持。" +"[#4541](https://github.com/vllm-project/vllm-ascend/pull/4541)" + +#: ../../source/user_guide/release_notes.md:269 +msgid "" +"**UCM Connector**: [Experimental]Added UCMConnector for KV Cache " +"Offloading. [#4411](https://github.com/vllm-project/vllm-" +"ascend/pull/4411)" +msgstr "" +"**UCM 连接器**: [实验性] 新增用于 KV 缓存卸载的 UCMConnector。" +"[#4411](https://github.com/vllm-project/vllm-ascend/pull/4411)" + +#: ../../source/user_guide/release_notes.md:270 +msgid "" +"**Batch Invariant**: [Experimental]Implemented basic framework for batch " +"invariant feature. [#5517](https://github.com/vllm-project/vllm-" +"ascend/pull/5517)" +msgstr "" +"**批次不变性**: [实验性] 实现了批次不变性功能的基本框架。" +"[#5517](https://github.com/vllm-project/vllm-ascend/pull/5517)" + +#: ../../source/user_guide/release_notes.md:271 +msgid "" +"**Sampling**: Enhanced sampling with async_scheduler and " +"disable_padded_drafter_batch support in Eagle. [#4893](https://github.com" +"/vllm-project/vllm-ascend/pull/4893)" +msgstr "" +"**采样**: 增强了采样功能,在 Eagle 中支持 async_scheduler 和 disable_padded_drafter_batch。" +"[#4893](https://github.com/vllm-project/vllm-ascend/pull/4893)" + +#: ../../source/user_guide/release_notes.md:275 +msgid "**Custom Operators**: Added multiple custom operators including:" +msgstr "**自定义算子**: 新增了多个自定义算子,包括:" + +#: ../../source/user_guide/release_notes.md:276 +msgid "" +"Fused matmul/reduce-scatter kernel [#3693](https://github.com/vllm-" +"project/vllm-ascend/pull/3693)" +msgstr "" +"融合的 matmul/reduce-scatter 内核 " +"[#3693](https://github.com/vllm-project/vllm-ascend/pull/3693)" + +#: ../../source/user_guide/release_notes.md:277 +msgid "" +"mrope fusion op [#3708](https://github.com/vllm-project/vllm-" +"ascend/pull/3708)" +msgstr "" +"mrope 融合算子 " +"[#3708](https://github.com/vllm-project/vllm-ascend/pull/3708)" + +#: ../../source/user_guide/release_notes.md:278 +msgid "" +"Triton chunk_gated_delta_rule ops for Qwen3-Next " +"[#4070](https://github.com/vllm-project/vllm-ascend/pull/4070)" +msgstr "" +"用于 Qwen3-Next 的 Triton chunk_gated_delta_rule 算子 " +"[#4070](https://github.com/vllm-project/vllm-ascend/pull/4070)" + +#: ../../source/user_guide/release_notes.md:279 +msgid "" +"l2norm triton kernel [#4595](https://github.com/vllm-project/vllm-" +"ascend/pull/4595)" +msgstr "" +"l2norm triton 内核 " +"[#4595](https://github.com/vllm-project/vllm-ascend/pull/4595)" + +#: ../../source/user_guide/release_notes.md:280 +msgid "RejectSampler, MoeInitRoutingCustom, DispatchFFNCombine custom ops" +msgstr "RejectSampler、MoeInitRoutingCustom、DispatchFFNCombine 自定义算子" + +#: ../../source/user_guide/release_notes.md:281 +msgid "" +"**Operator Fusion**: Added AddRmsnormQuant fusion pattern with SP support" +" and inductor fusion for quantization. [#5077](https://github.com/vllm-" +"project/vllm-ascend/pull/5077) [#4168](https://github.com/vllm-project" +"/vllm-ascend/pull/4168)" +msgstr "" +"**算子融合**: 新增了支持 SP 的 AddRmsnormQuant 融合模式以及用于量化的 inductor 融合。" +"[#5077](https://github.com/vllm-project/vllm-ascend/pull/5077) " +"[#4168](https://github.com/vllm-project/vllm-ascend/pull/4168)" + +#: ../../source/user_guide/release_notes.md:282 +msgid "" +"**MLA/SFA**: Refactored SFA into MLA architecture for better " +"maintainability. [#3769](https://github.com/vllm-project/vllm-" +"ascend/pull/3769)" +msgstr "" +"**MLA/SFA**: 将 SFA 重构为 MLA 架构,以提高可维护性。" +"[#3769](https://github.com/vllm-project/vllm-ascend/pull/3769)" + +#: ../../source/user_guide/release_notes.md:283 +msgid "" +"**FIA Operator**: Adapted to npu_fused_infer_attention_score with flash " +"decoding function. To optimize performance in small batch size scenarios," +" this attention operator is now available. Please refer to item 22 in " +"[FAQs](https://docs.vllm.ai/projects/ascend/en/v0.13.0/faqs.html) to " +"enable it. [#4025](https://github.com/vllm-project/vllm-ascend/pull/4025)" +msgstr "" +"**FIA 算子**: 适配了具有 flash decoding 功能的 npu_fused_infer_attention_score。为优化小批量场景下的性能,此注意力算子现已可用。请参阅 [常见问题](https://docs.vllm.ai/projects/ascend/en/v0.13.0/faqs.html) 中的第 22 项以启用它。" +"[#4025](https://github.com/vllm-project/vllm-ascend/pull/4025)" + +#: ../../source/user_guide/release_notes.md:284 +msgid "" +"**CANN 8.5 Support**: Removed CP redundant variables after FIA operator " +"enables for CANN 8.5. [#6039](https://github.com/vllm-project/vllm-" +"ascend/pull/6039)" +msgstr "" +"**CANN 8.5 支持**: 在启用 FIA 算子后,为 CANN 8.5 移除了 CP 冗余变量。" +"[#6039](https://github.com/vllm-project/vllm-ascend/pull/6039)" + +#: ../../source/user_guide/release_notes.md:288 +msgid "" +"Many custom ops and triton kernels were added in this release to speed up" +" model performance:" +msgstr "本版本新增了许多自定义算子和 triton 内核以加速模型性能:" + +#: ../../source/user_guide/release_notes.md:290 +msgid "" +"**DeepSeek Performance**: [Experimental]Improved performance for DeepSeek" +" V3.2 by eliminating HD synchronization in async scheduling and " +"optimizing memory usage for MTP. [#4805](https://github.com/vllm-project" +"/vllm-ascend/pull/4805) [#2713](https://github.com/vllm-project/vllm-" +"ascend/pull/2713)" +msgstr "" +"**DeepSeek 性能**: [实验性] 通过消除异步调度中的 HD 同步以及优化 MTP 的内存使用,提升了 DeepSeek V3.2 的性能。" +"[#4805](https://github.com/vllm-project/vllm-ascend/pull/4805) " +"[#2713](https://github.com/vllm-project/vllm-ascend/pull/2713)" + +#: ../../source/user_guide/release_notes.md:291 +msgid "" +"**Qwen3-Next Performance**: [Experimental]Improved performance with " +"Triton ops and optimizations. [#5664](https://github.com/vllm-project" +"/vllm-ascend/pull/5664) [#5984](https://github.com/vllm-project/vllm-" +"ascend/pull/5984) [#5765](https://github.com/vllm-project/vllm-" +"ascend/pull/5765)" +msgstr "" +"**Qwen3-Next 性能**: [实验性] 通过 Triton 算子和优化提升了性能。" +"[#5664](https://github.com/vllm-project/vllm-ascend/pull/5664) " +"[#5984](https://github.com/vllm-project/vllm-ascend/pull/5984) " +"[#5765](https://github.com/vllm-project/vllm-ascend/pull/5765)" + +#: ../../source/user_guide/release_notes.md:292 +msgid "" +"**FlashComm**: Enhanced FlashComm v2 optimization with o_shared linear " +"and communication domain fixes. [#3232](https://github.com/vllm-project" +"/vllm-ascend/pull/3232) [#4188](https://github.com/vllm-project/vllm-" +"ascend/pull/4188) [#4458](https://github.com/vllm-project/vllm-" +"ascend/pull/4458) [#5848](https://github.com/vllm-project/vllm-" +"ascend/pull/5848)" +msgstr "" +"**FlashComm**: 增强了 FlashComm v2 优化,包括 o_shared linear 和通信域修复。" +"[#3232](https://github.com/vllm-project/vllm-ascend/pull/3232) " +"[#4188](https://github.com/vllm-project/vllm-ascend/pull/4188) " +"[#4458](https://github.com/vllm-project/vllm-ascend/pull/4458) " +"[#5848](https://github.com/vllm-project/vllm-ascend/pull/5848)" + +#: ../../source/user_guide/release_notes.md:293 +msgid "" +"**MoE Optimization**: Optimized all2allv for MoE models and enhanced all-" +"reduce skipping logic. [#3738](https://github.com/vllm-project/vllm-" +"ascend/pull/3738) [#5329](https://github.com/vllm-project/vllm-" +"ascend/pull/5329)" +msgstr "" +"**MoE 优化**: 针对 MoE 模型优化了 all2allv,并增强了 all-reduce 跳过逻辑。" +"[#3738](https://github.com/vllm-project/vllm-ascend/pull/3738) " +"[#5329](https://github.com/vllm-project/vllm-ascend/pull/5329)" + +#: ../../source/user_guide/release_notes.md:294 +msgid "" +"**Attention Optimization**: Moved attention update stream out of loop, " +"converted BSND to TND format for long sequence optimization, and removed " +"transpose step after attention switching to transpose_batchmatmul. " +"[#3848](https://github.com/vllm-project/vllm-ascend/pull/3848) " +"[#3778](https://github.com/vllm-project/vllm-ascend/pull/3778) " +"[#5390](https://github.com/vllm-project/vllm-ascend/pull/5390)" +msgstr "" +"**注意力优化**: 将注意力更新流移出循环,为长序列优化将 BSND 格式转换为 TND 格式,并在注意力切换到 transpose_batchmatmul 后移除了转置步骤。" +"[#3848](https://github.com/vllm-project/vllm-ascend/pull/3848) " +"[#3778](https://github.com/vllm-project/vllm-ascend/pull/3778) " +"[#5390](https://github.com/vllm-project/vllm-ascend/pull/5390)" + +#: ../../source/user_guide/release_notes.md:295 +msgid "" +"**Quantization Performance**: Moved quantization before allgather in " +"Allgather EP. [#3420](https://github.com/vllm-project/vllm-" +"ascend/pull/3420)" +msgstr "" +"**量化性能**: 在 Allgather EP 中将量化操作移至 allgather 之前。" +"[#3420](https://github.com/vllm-project/vllm-ascend/pull/3420)" + +#: ../../source/user_guide/release_notes.md:296 +msgid "" +"**Layerwise Connector**: [Experimental]Improved performance of Layerwise " +"Connector. [#5303](https://github.com/vllm-project/vllm-ascend/pull/5303)" +msgstr "" +"**层间连接器**:[实验性] 提升了层间连接器的性能。[#5303](https://github.com/vllm-project/vllm-" +"ascend/pull/5303)" + +#: ../../source/user_guide/release_notes.md:297 +msgid "" +"**Prefix Cache**: Improved performance of prefix cache features. " +"[#4022](https://github.com/vllm-project/vllm-ascend/pull/4022)" +msgstr "" +"**前缀缓存**:提升了前缀缓存功能的性能。[#4022](https://github.com/vllm-project/vllm-" +"ascend/pull/4022)" + +#: ../../source/user_guide/release_notes.md:298 +msgid "" +"**Async Scheduling**: Fixed async copy and eliminated hangs in async " +"scheduling. [#4113](https://github.com/vllm-project/vllm-" +"ascend/pull/4113) [#4233](https://github.com/vllm-project/vllm-" +"ascend/pull/4233)" +msgstr "" +"**异步调度**:修复了异步复制问题,并消除了异步调度中的挂起现象。[#4113](https://github.com/vllm-project/vllm-" +"ascend/pull/4113) [#4233](https://github.com/vllm-project/vllm-" +"ascend/pull/4233)" + +#: ../../source/user_guide/release_notes.md:299 +msgid "" +"**Memory Operations**: Removed redundant D2H operations and deleted " +"redundant operations in model_runner. [#4063](https://github.com/vllm-" +"project/vllm-ascend/pull/4063) [#3677](https://github.com/vllm-project" +"/vllm-ascend/pull/3677)" +msgstr "" +"**内存操作**:移除了冗余的 D2H 操作,并删除了 model_runner 中的冗余操作。[#4063](https://github.com/vllm-" +"project/vllm-ascend/pull/4063) [#3677](https://github.com/vllm-project/vllm-" +"ascend/pull/3677)" + +#: ../../source/user_guide/release_notes.md:300 +msgid "" +"**Rope Embedding**: Optimized rope embedding with triton kernel for huge " +"performance gain. [#5918](https://github.com/vllm-project/vllm-" +"ascend/pull/5918)" +msgstr "" +"**Rope 嵌入**:使用 Triton 内核优化了 rope embedding,带来了巨大的性能提升。[#5918](https://github.com/vllm-" +"project/vllm-ascend/pull/5918)" + +#: ../../source/user_guide/release_notes.md:301 +msgid "" +"**Sampling**: Added support for advanced apply_top_k_top_p without top_k " +"constraint. [#6098](https://github.com/vllm-project/vllm-" +"ascend/pull/6098)" +msgstr "" +"**采样**:新增支持无 top_k 约束的高级 apply_top_k_top_p 功能。[#6098](https://github.com/vllm-" +"project/vllm-ascend/pull/6098)" + +#: ../../source/user_guide/release_notes.md:302 +msgid "" +"**Multimodal**: Parallelized Q/K/V padding in AscendMMEncoderAttention " +"for better performance. [#6204](https://github.com/vllm-project/vllm-" +"ascend/pull/6204)" +msgstr "" +"**多模态**:在 AscendMMEncoderAttention 中并行化 Q/K/V 填充以提升性能。[#6204](https://github.com/vllm-" +"project/vllm-ascend/pull/6204)" + +#: ../../source/user_guide/release_notes.md:306 +msgid "" +"**CANN**: Upgraded to 8.5.0 [#6112](https://github.com/vllm-project/vllm-" +"ascend/pull/6112)" +msgstr "" +"**CANN**:升级至 8.5.0 [#6112](https://github.com/vllm-project/vllm-" +"ascend/pull/6112)" + +#: ../../source/user_guide/release_notes.md:307 +msgid "" +"**torch-npu**: Upgraded to 2.8.0.post2. It's installed in the docker " +"container by default." +msgstr "" +"**torch-npu**:升级至 2.8.0.post2。默认已在 Docker 容器中安装。" + +#: ../../source/user_guide/release_notes.md:308 +msgid "" +"**triton-ascend**: Upgraded to 3.2.0 [#6105](https://github.com/vllm-" +"project/vllm-ascend/pull/6105)" +msgstr "" +"**triton-ascend**:升级至 3.2.0 [#6105](https://github.com/vllm-project/vllm-" +"ascend/pull/6105)" + +#: ../../source/user_guide/release_notes.md:309 +msgid "" +"**vLLM**: Upgraded to 0.13.0 and dropped 0.12.0 support. " +"[#5146](https://github.com/vllm-project/vllm-ascend/pull/5146)" +msgstr "" +"**vLLM**:升级至 0.13.0 并停止支持 0.12.0。[#5146](https://github.com/vllm-project/vllm-" +"ascend/pull/5146)" + +#: ../../source/user_guide/release_notes.md:310 +msgid "" +"**Transformers**: Upgraded to >= 4.57.4 [#5250](https://github.com/vllm-" +"project/vllm-ascend/pull/5250)" +msgstr "" +"**Transformers**:升级至 >= 4.57.4 [#5250](https://github.com/vllm-project/vllm-" +"ascend/pull/5250)" + +#: ../../source/user_guide/release_notes.md:314 +msgid "" +"**CPUOffloadingConnector** is deprecated. We'll remove it in the next " +"release. It'll be replaced by CPUOffload feature from vLLM in the future." +msgstr "" +"**CPUOffloadingConnector** 已弃用。我们将在下一个版本中移除它。未来将由 vLLM 的 CPUOffload 功能替代。" + +#: ../../source/user_guide/release_notes.md:315 +msgid "" +"**ProfileExecuteDuration** " +"[feature](https://docs.vllm.ai/projects/ascend/en/v0.13.0/developer_guide/performance_and_debug/profile_execute_duration.html)" +" is deprecated." +msgstr "" +"**ProfileExecuteDuration** " +"[功能](https://docs.vllm.ai/projects/ascend/en/v0.13.0/developer_guide/performance_and_debug/profile_execute_duration.html) 已弃用。" + +#: ../../source/user_guide/release_notes.md:316 +msgid "" +"**Ascend Scheduler** has been dropped. [#4623](https://github.com/vllm-" +"project/vllm-ascend/pull/4623)" +msgstr "" +"**Ascend 调度器** 已被移除。[#4623](https://github.com/vllm-project/vllm-" +"ascend/pull/4623)" + +#: ../../source/user_guide/release_notes.md:317 +msgid "" +"**Torchair** has been dropped. [#4814](https://github.com/vllm-project" +"/vllm-ascend/pull/4814)" +msgstr "" +"**Torchair** 已被移除。[#4814](https://github.com/vllm-project/vllm-" +"ascend/pull/4814)" + +#: ../../source/user_guide/release_notes.md:318 +msgid "" +"**VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE** is removed and " +"`VLLM_ASCEND_ENABLE_PREFETCH_MLP` is recommended to replace as they were " +"always enabled together. [#5272](https://github.com/vllm-project/vllm-" +"ascend/pull/5272)" +msgstr "" +"**VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE** 已被移除,建议使用 `VLLM_ASCEND_ENABLE_PREFETCH_MLP` 替代,因为它们之前总是一起启用。[#5272](https://github.com/vllm-project/vllm-ascend/pull/5272)" + +#: ../../source/user_guide/release_notes.md:319 +msgid "" +"**VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP** is dropped now. " +"[#5270](https://github.com/vllm-project/vllm-ascend/pull/5270)" +msgstr "" +"**VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP** 现已被移除。[#5270](https://github.com/vllm-project/vllm-ascend/pull/5270)" + +#: ../../source/user_guide/release_notes.md:320 +msgid "" +"**VLLM_ASCEND_ENABLE_NZ** is disabled for float weight case, since we " +"noticed that the performance is not good in some float cases. Feel free " +"to set it to 2 if you make sure it works for your case. " +"[#4878](https://github.com/vllm-project/vllm-ascend/pull/4878)" +msgstr "" +"对于浮点权重的情况,**VLLM_ASCEND_ENABLE_NZ** 已被禁用,因为我们注意到在某些浮点场景下性能不佳。如果您确定它适用于您的情况,可以将其设置为 2。[#4878](https://github.com/vllm-project/vllm-ascend/pull/4878)" + +#: ../../source/user_guide/release_notes.md:321 +msgid "" +"**chunked_prefill_for_mla** in `additional_config` is dropped now. " +"[#5296](https://github.com/vllm-project/vllm-ascend/pull/5296)" +msgstr "" +"`additional_config` 中的 **chunked_prefill_for_mla** 现已被移除。[#5296](https://github.com/vllm-project/vllm-ascend/pull/5296)" + +#: ../../source/user_guide/release_notes.md:322 +msgid "" +"**dump_config** in `additional_config` is renamed to `dump_config_path` " +"and the type is changed from `dict` to `string`. " +"[#5296](https://github.com/vllm-project/vllm-ascend/pull/5296)" +msgstr "" +"`additional_config` 中的 **dump_config** 已重命名为 `dump_config_path`,类型也从 `dict` 更改为 `string`。[#5296](https://github.com/vllm-project/vllm-ascend/pull/5296)" + +#: ../../source/user_guide/release_notes.md:323 +msgid "" +"**--task parameter** for embedding models is deprecated. " +"[#5257](https://github.com/vllm-project/vllm-ascend/pull/5257)" +msgstr "" +"用于嵌入模型的 **--task 参数** 已弃用。[#5257](https://github.com/vllm-project/vllm-ascend/pull/5257)" + +#: ../../source/user_guide/release_notes.md:324 +msgid "" +"**The value of VLLM_ASCEND_ENABLE_MLAPO** env will be set to True by " +"default in the next release. It'll be enabled in decode node by default. " +"Please note that this feature will cost more memory. If you are memory " +"sensitive, please set it to False." +msgstr "" +"**VLLM_ASCEND_ENABLE_MLAPO** 环境变量的值将在下一个版本中默认设置为 True。它将在解码节点默认启用。请注意,此功能会消耗更多内存。如果您对内存敏感,请将其设置为 False。" + +#: ../../source/user_guide/release_notes.md:328 +msgid "" +"Added comprehensive developer guides for ACLGraph, MTP, KV Pool, EPLB, " +"and PD disaggregation features" +msgstr "新增了关于 ACLGraph、MTP、KV 池、EPLB 和 PD 解耦功能的全面开发者指南" + +#: ../../source/user_guide/release_notes.md:329 +msgid "" +"Added tutorials for multiple models including DeepSeek-V3.2-Exp, " +"Qwen3-Next, and various multimodal models" +msgstr "新增了多个模型的教程,包括 DeepSeek-V3.2-Exp、Qwen3-Next 以及各种多模态模型" + +#: ../../source/user_guide/release_notes.md:330 +msgid "Updated FAQ and configuration documentation" +msgstr "更新了常见问题解答和配置文档" + +#: ../../source/user_guide/release_notes.md:334 +msgid "" +"**OOM Fix**: OOM error on VL models is fixed now. We're keeping observing" +" it. If you hit OOM problem again, please submit an issue. " +"[#5136](https://github.com/vllm-project/vllm-ascend/pull/5136)" +msgstr "" +"**OOM 修复**:VL 模型上的 OOM 错误现已修复。我们将持续观察。如果您再次遇到 OOM 问题,请提交 issue。[#5136](https://github.com/vllm-project/vllm-ascend/pull/5136)" + +#: ../../source/user_guide/release_notes.md:335 +msgid "" +"**Qwen3-Next-MTP Accuracy**: Fixed an accuracy bug of Qwen3-Next-MTP when" +" batched inferring. [#4932](https://github.com/vllm-project/vllm-" +"ascend/pull/4932)" +msgstr "" +"**Qwen3-Next-MTP 准确性**:修复了 Qwen3-Next-MTP 在批量推理时的准确性错误。[#4932](https://github.com/vllm-project/vllm-ascend/pull/4932)" + +#: ../../source/user_guide/release_notes.md:336 +msgid "" +"**ZMQ Bug Fix**: Fixed zmq send/receive failed bug. " +"[#5503](https://github.com/vllm-project/vllm-ascend/pull/5503)" +msgstr "" +"**ZMQ 错误修复**:修复了 zmq 发送/接收失败的 bug。[#5503](https://github.com/vllm-project/vllm-ascend/pull/5503)" + +#: ../../source/user_guide/release_notes.md:337 +msgid "" +"**Weight Transpose**: Fixed weight transpose in RL scenarios. " +"[#5567](https://github.com/vllm-project/vllm-ascend/pull/5567)" +msgstr "" +"**权重转置**:修复了 RL 场景中的权重转置问题。[#5567](https://github.com/vllm-project/vllm-ascend/pull/5567)" + +#: ../../source/user_guide/release_notes.md:338 +msgid "" +"**Eagle3 SP**: Adapted SP to eagle3. [#5562](https://github.com/vllm-" +"project/vllm-ascend/pull/5562)" +msgstr "" +"**Eagle3 SP**:使 SP 适配 eagle3。[#5562](https://github.com/vllm-project/vllm-ascend/pull/5562)" + +#: ../../source/user_guide/release_notes.md:339 +msgid "" +"**GLM4.6 MTP**: GLM4.6 now supports MTP with fullgraph. " +"[#5460](https://github.com/vllm-project/vllm-ascend/pull/5460)" +msgstr "" +"**GLM4.6 MTP**:GLM4.6 现在支持使用全图的 MTP。[#5460](https://github.com/vllm-project/vllm-ascend/pull/5460)" + +#: ../../source/user_guide/release_notes.md:340 +msgid "" +"**Flashcomm2 Oshard**: Flashcomm2 now works with oshard generalized " +"feature. [#4723](https://github.com/vllm-project/vllm-ascend/pull/4723)" +msgstr "" +"**Flashcomm2 Oshard**:Flashcomm2 现在可与 oshard 通用化功能协同工作。[#4723](https://github.com/vllm-project/vllm-ascend/pull/4723)" + +#: ../../source/user_guide/release_notes.md:341 +msgid "" +"**Fine-grained Shared Expert Overlap**: Support fine-grained shared " +"expert overlap. [#5962](https://github.com/vllm-project/vllm-" +"ascend/pull/5962)" +msgstr "" +"**细粒度共享专家重叠**:支持细粒度的共享专家重叠。[#5962](https://github.com/vllm-project/vllm-ascend/pull/5962)" + +#: ../../source/user_guide/release_notes.md:345 +msgid "" +"Due to the upgrade of `transformers` package, some models quantization " +"weight, such as `qwen2.5vl`, `gemma3`, `minimax`, may not work. We'll fix" +" it in the next post release. [#6302](https://github.com/vllm-project" +"/vllm-ascend/pull/6302)" +msgstr "" +"由于 `transformers` 包的升级,某些模型的量化权重(如 `qwen2.5vl`、`gemma3`、`minimax`)可能无法工作。我们将在下一个后续版本中修复此问题。[#6302](https://github.com/vllm-project/vllm-ascend/pull/6302)" + +#: ../../source/user_guide/release_notes.md:346 +msgid "" +"The performance of `Qwen3-32B` will not be good with 128K input case, " +"it's suggested to enable pcp&dcp feature for this case. This will be " +"improved in the next CANN release." +msgstr "" +"`Qwen3-32B` 在 128K 输入长度场景下的性能可能不佳,建议为此场景启用 pcp&dcp 功能。这将在下一个 CANN 版本中得到改进。" + +#: ../../source/user_guide/release_notes.md:347 +msgid "" +"The performance of `Qwen3-235B`, `Qwen3-480B` under prefill-decode " +"scenario and EP=32 scenario is not good as expect. We'll improve it in " +"the next post release." +msgstr "" +"`Qwen3-235B`、`Qwen3-480B` 在预填充-解码场景和 EP=32 场景下的性能未达预期。我们将在下一个后续版本中改进。" + +#: ../../source/user_guide/release_notes.md:348 +msgid "" +"When deploy deepseek3.1 under prefill-decode scenario, please make sure " +"the tp size for decode node is great than 1. `TP=1` doesn't work. This " +"will be fixed in the next CANN release." +msgstr "" +"在预填充-解码场景下部署 deepseek3.1 时,请确保解码节点的 tp 大小大于 1。`TP=1` 无法工作。这将在下一个 CANN 版本中修复。" + +#: ../../source/user_guide/release_notes.md:350 +msgid "v0.14.0rc1 - 2026.01.26" +msgstr "v0.14.0rc1 - 2026.01.26" + +#: ../../source/user_guide/release_notes.md:352 +msgid "" +"This is the first release candidate of v0.14.0 for vLLM Ascend. Please " +"follow the [official doc](https://docs.vllm.ai/projects/ascend/en/latest)" +" to get started. This release includes all the changes in v0.13.0rc2. So " +"We just list the differences from v0.13.0rc2. If you are upgrading from " +"v0.13.0rc1, please read both v0.14.0rc1 and v0.13.0rc2 release notes." +msgstr "" +"这是 vLLM Ascend v0.14.0 的第一个候选版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/latest)开始使用。此版本包含了 v0.13.0rc2 中的所有更改。因此我们仅列出与 v0.13.0rc2 的差异。如果您是从 v0.13.0rc1 升级,请同时阅读 v0.14.0rc1 和 v0.13.0rc2 的发布说明。" + +#: ../../source/user_guide/release_notes.md:356 +msgid "" +"310P support is back now. In this release, only basic dense and vl models" +" are supported with eager mode. We'll keep improving and maintaining the " +"support for 310P. [#5776](https://github.com/vllm-project/vllm-" +"ascend/pull/5776)" +msgstr "" +"现已恢复对 310P 的支持。在此版本中,仅支持基础密集模型和 VL 模型,且使用 Eager 模式。我们将持续改进和维护对 310P 的支持。[#5776](https://github.com/vllm-project/vllm-ascend/pull/5776)" + +#: ../../source/user_guide/release_notes.md:357 +msgid "" +"Support compressed tensors moe w8a8-int8 quantization. " +"[#5718](https://github.com/vllm-project/vllm-ascend/pull/5718)" +msgstr "" +"支持压缩张量的 MoE w8a8-int8 量化。 " +"[#5718](https://github.com/vllm-project/vllm-ascend/pull/5718)" + +#: ../../source/user_guide/release_notes.md:358 +msgid "" +"Support Medusa speculative decoding. [#5668](https://github.com/vllm-" +"project/vllm-ascend/pull/5668)" +msgstr "" +"支持 Medusa 推测式解码。 [#5668](https://github.com/vllm-project/vllm-" +"ascend/pull/5668)" + +#: ../../source/user_guide/release_notes.md:359 +msgid "" +"Support Eagle3 speculative decoding for Qwen3vl. " +"[#4848](https://github.com/vllm-project/vllm-ascend/pull/4848)" +msgstr "" +"支持 Qwen3vl 的 Eagle3 推测式解码。 " +"[#4848](https://github.com/vllm-project/vllm-ascend/pull/4848)" + +#: ../../source/user_guide/release_notes.md:363 +msgid "" +"Xlite Backend supports Qwen3 MoE now. [#5951](https://github.com/vllm-" +"project/vllm-ascend/pull/5951)" +msgstr "" +"Xlite 后端现已支持 Qwen3 MoE。 [#5951](https://github.com/vllm-project/vllm-" +"ascend/pull/5951)" + +#: ../../source/user_guide/release_notes.md:364 +msgid "" +"Support DSA-CP for PD-mix deployment case. [#5702](https://github.com" +"/vllm-project/vllm-ascend/pull/5702)" +msgstr "" +"支持 PD-mix 部署场景的 DSA-CP。 [#5702](https://github.com/vllm-project/vllm-" +"ascend/pull/5702)" + +#: ../../source/user_guide/release_notes.md:365 +msgid "" +"Add support of new W4A4_LAOS_DYNAMIC quantization method. " +"[#5143](https://github.com/vllm-project/vllm-ascend/pull/5143)" +msgstr "" +"新增对 W4A4_LAOS_DYNAMIC 量化方法的支持。 " +"[#5143](https://github.com/vllm-project/vllm-ascend/pull/5143)" + +#: ../../source/user_guide/release_notes.md:369 +msgid "" +"The performance of Qwen3-next has been improved. " +"[#5664](https://github.com/vllm-project/vllm-ascend/pull/5664) " +"[#5984](https://github.com/vllm-project/vllm-ascend/pull/5984) " +"[#5765](https://github.com/vllm-project/vllm-ascend/pull/5765)" +msgstr "" +"Qwen3-next 的性能已得到提升。 " +"[#5664](https://github.com/vllm-project/vllm-ascend/pull/5664) " +"[#5984](https://github.com/vllm-project/vllm-ascend/pull/5984) " +"[#5765](https://github.com/vllm-project/vllm-ascend/pull/5765)" + +#: ../../source/user_guide/release_notes.md:370 +msgid "" +"The CPU bind logic and performance has been improved. " +"[#5555](https://github.com/vllm-project/vllm-ascend/pull/5555)" +msgstr "" +"CPU 绑定逻辑和性能已得到改进。 [#5555](https://github.com/vllm-project/vllm-" +"ascend/pull/5555)" + +#: ../../source/user_guide/release_notes.md:371 +msgid "" +"Merge Q/K split to simplify AscendApplyRotaryEmb for better performance. " +"[#5799](https://github.com/vllm-project/vllm-ascend/pull/5799)" +msgstr "" +"合并 Q/K 拆分以简化 AscendApplyRotaryEmb,从而提升性能。 " +"[#5799](https://github.com/vllm-project/vllm-ascend/pull/5799)" + +#: ../../source/user_guide/release_notes.md:372 +msgid "" +"Add Matmul Allreduce Rmsnorm fusion Pass. It's disabled by default. Set " +"`fuse_allreduce_rms=True` in `--additional_config` to enable it. " +"[#5034](https://github.com/vllm-project/vllm-ascend/pull/5034)" +msgstr "" +"新增 Matmul Allreduce Rmsnorm 融合 Pass。默认禁用。在 `--additional_config` " +"中设置 `fuse_allreduce_rms=True` 以启用它。 " +"[#5034](https://github.com/vllm-project/vllm-ascend/pull/5034)" + +#: ../../source/user_guide/release_notes.md:373 +msgid "" +"Optimize rope embedding with triton kernel for huge performance gain. " +"[#5918](https://github.com/vllm-project/vllm-ascend/pull/5918)" +msgstr "" +"使用 triton kernel 优化 rope embedding,以获得巨大的性能提升。 " +"[#5918](https://github.com/vllm-project/vllm-ascend/pull/5918)" + +#: ../../source/user_guide/release_notes.md:374 +msgid "" +"support advanced apply_top_k_top_p without top_k constraint. " +"[#6098](https://github.com/vllm-project/vllm-ascend/pull/6098)" +msgstr "" +"支持无 top_k 约束的高级 apply_top_k_top_p。 " +"[#6098](https://github.com/vllm-project/vllm-ascend/pull/6098)" + +#: ../../source/user_guide/release_notes.md:375 +msgid "" +"Parallelize Q/K/V padding in AscendMMEncoderAttention for better " +"performance. [#6204](https://github.com/vllm-project/vllm-" +"ascend/pull/6204)" +msgstr "" +"在 AscendMMEncoderAttention 中并行化 Q/K/V 填充以获得更好的性能。 " +"[#6204](https://github.com/vllm-project/vllm-ascend/pull/6204)" + +#: ../../source/user_guide/release_notes.md:379 +msgid "" +"model runner v2 support triton of penalty. [#5854](https://github.com" +"/vllm-project/vllm-ascend/pull/5854)" +msgstr "" +"model runner v2 支持 triton 惩罚。 [#5854](https://github.com/vllm-project/vllm-" +"ascend/pull/5854)" + +#: ../../source/user_guide/release_notes.md:380 +msgid "" +"model runner v2 support eagle spec decoding. [#5840](https://github.com" +"/vllm-project/vllm-ascend/pull/5840)" +msgstr "" +"model runner v2 支持 eagle 推测式解码。 [#5840](https://github.com/vllm-" +"project/vllm-ascend/pull/5840)" + +#: ../../source/user_guide/release_notes.md:381 +msgid "" +"Fix multimodal inference OOM issues by setting `expandable_segments:True`" +" by default. [#5855](https://github.com/vllm-project/vllm-" +"ascend/pull/5855)" +msgstr "" +"通过默认设置 `expandable_segments:True` 修复多模态推理 OOM 问题。 " +"[#5855](https://github.com/vllm-project/vllm-ascend/pull/5855)" + +#: ../../source/user_guide/release_notes.md:382 +msgid "" +"`VLLM_ASCEND_ENABLE_MLAPO` is set to `True` by default. It's enabled " +"automatically on decode node in PD deployment case. Please note that this" +" feature will cost more memory. If you are memory sensitive, please set " +"it to False. [#5952](https://github.com/vllm-project/vllm-" +"ascend/pull/5952)" +msgstr "" +"`VLLM_ASCEND_ENABLE_MLAPO` 默认设置为 `True`。在 PD 部署场景的解码节点上会自动启用。请注意,此功能会消耗更多内存。如果您对内存敏感,请将其设置为 False。 " +"[#5952](https://github.com/vllm-project/vllm-ascend/pull/5952)" + +#: ../../source/user_guide/release_notes.md:383 +msgid "" +"SSL config can be set to kv_extra_config for PD deployment with mooncake " +"layerwise connector. [#5875](https://github.com/vllm-project/vllm-" +"ascend/pull/5875)" +msgstr "" +"对于使用 mooncake 分层连接器的 PD 部署,SSL 配置可以设置到 kv_extra_config 中。 " +"[#5875](https://github.com/vllm-project/vllm-ascend/pull/5875)" + +#: ../../source/user_guide/release_notes.md:384 +msgid "" +"support `--max_model_len=auto`. [#6193](https://github.com/vllm-project" +"/vllm-ascend/pull/6193)" +msgstr "" +"支持 `--max_model_len=auto`。 [#6193](https://github.com/vllm-project/vllm-" +"ascend/pull/6193)" + +#: ../../source/user_guide/release_notes.md:388 +msgid "" +"torch-npu is upgraded to 2.9.0 [#6112](https://github.com/vllm-project" +"/vllm-ascend/pull/6112)" +msgstr "" +"torch-npu 已升级至 2.9.0 [#6112](https://github.com/vllm-project/vllm-" +"ascend/pull/6112)" + +#: ../../source/user_guide/release_notes.md:392 +msgid "" +"EPLB config options is moved to `eplb_config` in [additional " +"config](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/configuration/additional_config.html)." +" The old ones are removed in this release." +msgstr "" +"EPLB 配置选项已移至 [additional " +"config](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/configuration/additional_config.html) 中的 `eplb_config`。旧选项在此版本中已被移除。" + +#: ../../source/user_guide/release_notes.md:393 +msgid "" +"The profiler envs, such as `VLLM_TORCH_PROFILER_DIR` and " +"`VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY` do not work with vLLM Ascend " +"now. Please use vLLM `--profiler-config` parameters instead. " +"[#5928](https://github.com/vllm-project/vllm-ascend/pull/5928)" +msgstr "" +"分析器环境变量,如 `VLLM_TORCH_PROFILER_DIR` 和 " +"`VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY`,现已不适用于 vLLM Ascend。请改用 vLLM 的 `--profiler-config` 参数。 " +"[#5928](https://github.com/vllm-project/vllm-ascend/pull/5928)" + +#: ../../source/user_guide/release_notes.md:395 +msgid "Known Issues" +msgstr "已知问题" + +#: ../../source/user_guide/release_notes.md:397 +msgid "" +"If you hit the pickle error from `EngineCore` process sometimes, please " +"cherry-pick the [PR](https://github.com/vllm-project/vllm/pull/32022) " +"into your local vLLM code. This known issue will be fixed in vLLM in the " +"next release." +msgstr "" +"如果您有时遇到来自 `EngineCore` 进程的 pickle 错误,请将 [PR](https://github.com/vllm-project/vllm/pull/32022) cherry-pick 到您的本地 vLLM 代码中。此已知问题将在 vLLM 的下一个版本中修复。" + +#: ../../source/user_guide/release_notes.md:399 +msgid "v0.13.0rc2 - 2026.01.24" +msgstr "v0.13.0rc2 - 2026.01.24" + +#: ../../source/user_guide/release_notes.md:401 +msgid "" +"This is the second release candidate of v0.13.0 for vLLM Ascend. In this " +"rc release, we fixed lots of bugs and improved the performance of many " +"models. Please follow the [official " +"doc](https://docs.vllm.ai/projects/ascend/en/v0.13.0/) to get started. " +"Any feedback is welcome to help us to improve the final version of " +"v0.13.0." +msgstr "" +"这是 vLLM Ascend v0.13.0 的第二个候选版本。在此 rc 版本中,我们修复了大量错误并提升了多个模型的性能。请按照 [官方文档](https://docs.vllm.ai/projects/ascend/en/v0.13.0/) 开始使用。欢迎任何反馈以帮助我们改进 v0.13.0 的最终版本。" + +#: ../../source/user_guide/release_notes.md:405 +msgid "" +"We mainly focus on quality and performance improvement in this release. " +"The spec decode, graph mode, context parallel and EPLB have been improved" +" significantly. A lot of bugs have been fixed and the performance has " +"been improved for DeepSeek3.1/3.2, Qwen3 Dense/MOE models." +msgstr "" +"在此版本中,我们主要关注质量和性能的改进。推测式解码、图模式、上下文并行和 EPLB 都得到了显著提升。修复了大量错误,并提升了 DeepSeek3.1/3.2、Qwen3 Dense/MOE 模型的性能。" + +#: ../../source/user_guide/release_notes.md:409 +msgid "" +"implement basic framework for batch invariant [#5517](https://github.com" +"/vllm-project/vllm-ascend/pull/5517)" +msgstr "" +"实现批处理不变性的基础框架 [#5517](https://github.com/vllm-project/vllm-" +"ascend/pull/5517)" + +#: ../../source/user_guide/release_notes.md:410 +msgid "" +"Eagle spec decode feature now works with full graph mode. " +"[#5118](https://github.com/vllm-project/vllm-ascend/pull/5118)" +msgstr "" +"Eagle 推测式解码功能现在可与全图模式协同工作。 " +"[#5118](https://github.com/vllm-project/vllm-ascend/pull/5118)" + +#: ../../source/user_guide/release_notes.md:411 +msgid "" +"Context Parallel(PCP&DCP) feature is more stable now. And it works for " +"most case. Please try it out." +msgstr "" +"上下文并行(PCP&DCP)功能现在更加稳定,适用于大多数情况。请尝试使用。" + +#: ../../source/user_guide/release_notes.md:412 +msgid "" +"MTP and eagle spec decode feature now works in most cases. And it's " +"suggested to use them in most cases." +msgstr "" +"MTP 和 eagle 推测式解码功能现在在大多数情况下都能工作。建议在大多数情况下使用它们。" + +#: ../../source/user_guide/release_notes.md:413 +msgid "" +"EPLB feature more stable now. Many bugs have been fixed. Mix placement " +"works now [#6086](https://github.com/vllm-project/vllm-ascend/pull/6086)" +msgstr "" +"EPLB 功能现在更加稳定。修复了许多错误。混合放置现已可用 [#6086](https://github.com/vllm-project/vllm-ascend/pull/6086)" + +#: ../../source/user_guide/release_notes.md:414 +msgid "" +"Support kv nz feature for DeepSeek decode node in disagg-prefill scenario" +" [#3072](https://github.com/vllm-project/vllm-ascend/pull/3072)" +msgstr "" +"支持解耦-预填充场景下 DeepSeek 解码节点的 kv nz 功能 [#3072](https://github.com/vllm-project/vllm-ascend/pull/3072)" + +#: ../../source/user_guide/release_notes.md:416 +msgid "Model Support" +msgstr "模型支持" + +#: ../../source/user_guide/release_notes.md:418 +msgid "" +"LongCat-Flash is supported now.[#3833](https://github.com/vllm-project" +"/vllm-ascend/pull/3833)" +msgstr "" +"现已支持 LongCat-Flash。[#3833](https://github.com/vllm-project/vllm-" +"ascend/pull/3833)" + +#: ../../source/user_guide/release_notes.md:419 +msgid "" +"minimax_m2 is supported now. [#5624](https://github.com/vllm-project" +"/vllm-ascend/pull/5624)" +msgstr "" +"现已支持 minimax_m2。 [#5624](https://github.com/vllm-project/vllm-" +"ascend/pull/5624)" + +#: ../../source/user_guide/release_notes.md:420 +msgid "" +"Support for cross-attention and whisper models [#5592](https://github.com" +"/vllm-project/vllm-ascend/pull/5592)" +msgstr "" +"支持交叉注意力和 whisper 模型 [#5592](https://github.com/vllm-project/vllm-" +"ascend/pull/5592)" + +#: ../../source/user_guide/release_notes.md:424 +msgid "" +"Many custom ops and triton kernels are added in this release to speed up " +"the performance of models. Such as `RejectSampler`, " +"`MoeInitRoutingCustom`, `DispatchFFNCombine` and so on." +msgstr "" +"此版本中添加了许多自定义算子和 triton kernel 以加速模型性能。例如 `RejectSampler`、`MoeInitRoutingCustom`、`DispatchFFNCombine` 等。" + +#: ../../source/user_guide/release_notes.md:425 +msgid "" +"Improved the performance of Layerwise Connector " +"[#5303](https://github.com/vllm-project/vllm-ascend/pull/5303)" +msgstr "" +"提升了 Layerwise Connector 的性能 [#5303](https://github.com/vllm-project/vllm-ascend/pull/5303)" + +#: ../../source/user_guide/release_notes.md:429 +msgid "" +"Basic support Model Runner v2. Model Runner V2 is the next generation of " +"vLLM. It will be used by default in the future release. " +"[#5210](https://github.com/vllm-project/vllm-ascend/pull/5210)" +msgstr "" +"基础支持 Model Runner v2。Model Runner V2 是 vLLM 的下一代版本,将在未来的版本中默认使用。" +"[#5210](https://github.com/vllm-project/vllm-ascend/pull/5210)" + +#: ../../source/user_guide/release_notes.md:430 +msgid "" +"Fixed a bug that the zmq send/receive may failed " +"[#5503](https://github.com/vllm-project/vllm-ascend/pull/5503)" +msgstr "" +"修复了 zmq 发送/接收可能失败的 bug [#5503](https://github.com/vllm-project/vllm-ascend/pull/5503)" + +#: ../../source/user_guide/release_notes.md:431 +msgid "" +"Supported to use full-graph with Qwen3-Next-MTP " +"[#5477](https://github.com/vllm-project/vllm-ascend/pull/5477)" +msgstr "" +"支持 Qwen3-Next-MTP 使用全图模式 [#5477](https://github.com/vllm-project/vllm-ascend/pull/5477)" + +#: ../../source/user_guide/release_notes.md:432 +msgid "" +"Fix weight transpose in RL scenarios [#5567](https://github.com/vllm-" +"project/vllm-ascend/pull/5567)" +msgstr "" +"修复强化学习场景中的权重转置问题 [#5567](https://github.com/vllm-project/vllm-ascend/pull/5567)" + +#: ../../source/user_guide/release_notes.md:433 +msgid "" +"Adapted SP to eagle3 [#5562](https://github.com/vllm-project/vllm-" +"ascend/pull/5562)" +msgstr "" +"适配 SP 以支持 eagle3 [#5562](https://github.com/vllm-project/vllm-ascend/pull/5562)" + +#: ../../source/user_guide/release_notes.md:434 +msgid "" +"Context Parallel(PCP&DCP) support mlapo [#5672](https://github.com/vllm-" +"project/vllm-ascend/pull/5672)" +msgstr "" +"上下文并行(PCP&DCP)支持 mlapo [#5672](https://github.com/vllm-project/vllm-ascend/pull/5672)" + +#: ../../source/user_guide/release_notes.md:435 +msgid "" +"GLM4.6 support mtp with fullgraph [#5460](https://github.com/vllm-project" +"/vllm-ascend/pull/5460)" +msgstr "" +"GLM4.6 支持使用全图模式的 mtp [#5460](https://github.com/vllm-project/vllm-ascend/pull/5460)" + +#: ../../source/user_guide/release_notes.md:436 +msgid "" +"Flashcomm2 now works with oshard generalized feature " +"[#4723](https://github.com/vllm-project/vllm-ascend/pull/4723)" +msgstr "" +"Flashcomm2 现在可与 oshard 通用化功能协同工作 [#4723](https://github.com/vllm-project/vllm-ascend/pull/4723)" + +#: ../../source/user_guide/release_notes.md:437 +msgid "" +"Support setting tp=1 for the Eagle draft model [#5804](https://github.com" +"/vllm-project/vllm-ascend/pull/5804)" +msgstr "" +"支持为 Eagle 草稿模型设置 tp=1 [#5804](https://github.com/vllm-project/vllm-ascend/pull/5804)" + +#: ../../source/user_guide/release_notes.md:438 +msgid "" +"Flashcomm1 feature now works with qwen3-vl [#5848](https://github.com" +"/vllm-project/vllm-ascend/pull/5848)" +msgstr "" +"Flashcomm1 功能现在可与 qwen3-vl 协同工作 [#5848](https://github.com/vllm-project/vllm-ascend/pull/5848)" + +#: ../../source/user_guide/release_notes.md:439 +msgid "" +"Support fine-grained shared expert overlap [#5962](https://github.com" +"/vllm-project/vllm-ascend/pull/5962)" +msgstr "" +"支持细粒度共享专家重叠 [#5962](https://github.com/vllm-project/vllm-ascend/pull/5962)" + +#: ../../source/user_guide/release_notes.md:443 +msgid "CANN is upgraded to 8.5.0" +msgstr "CANN 已升级至 8.5.0" + +#: ../../source/user_guide/release_notes.md:444 +msgid "" +"torch-npu is upgraded to 2.8.0.post1. Please note that the post version " +"will not be installed by default. Please install it by hand from [pypi " +"mirror](https://mirrors.huaweicloud.com/ascend/repos/pypi/torch-npu/)." +msgstr "" +"torch-npu 已升级至 2.8.0.post1。请注意,此 post 版本默认不会安装,请从 [pypi 镜像](https://mirrors.huaweicloud.com/ascend/repos/pypi/torch-npu/)手动安装。" + +#: ../../source/user_guide/release_notes.md:445 +msgid "triton-ascend is upgraded to 3.2.0" +msgstr "triton-ascend 已升级至 3.2.0" + +#: ../../source/user_guide/release_notes.md:449 +msgid "" +"`CPUOffloadingConnector` is deprecated. We'll remove it in the next " +"release. It'll be replaced by CPUOffload feature from vLLM in the future." +msgstr "" +"`CPUOffloadingConnector` 已弃用。我们将在下一个版本中移除它。未来将由 vLLM 的 CPUOffload 功能替代。" + +#: ../../source/user_guide/release_notes.md:450 +msgid "" +"eplb config options is moved to `eplb_config` in [additional " +"config](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/configuration/additional_config.html)." +" The old ones will be removed in the next release." +msgstr "" +"eplb 配置选项已移至 [附加配置](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/configuration/additional_config.html) 中的 `eplb_config`。旧选项将在下一个版本中移除。" + +#: ../../source/user_guide/release_notes.md:451 +msgid "" +"`ProfileExecuteDuration` [feature](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.13.0rc2/docs/source/developer_guide/performance_and_debug/profile_execute_duration.md)" +" is deprecated. It's replaced by `ObservabilityConfig` from vLLM." +msgstr "" +"`ProfileExecuteDuration` [功能](https://github.com/vllm-project/vllm-ascend/blob/v0.13.0rc2/docs/source/developer_guide/performance_and_debug/profile_execute_duration.md) 已弃用。它已被 vLLM 的 `ObservabilityConfig` 取代。" + +#: ../../source/user_guide/release_notes.md:452 +msgid "" +"The value of `VLLM_ASCEND_ENABLE_MLAPO` env will be set to True by " +"default in the next release. It'll be enabled in decode node by default. " +"Please note that this feature will cost more memory. If you are memory " +"sensitive, please set it to False." +msgstr "" +"`VLLM_ASCEND_ENABLE_MLAPO` 环境变量的值将在下一个版本中默认设置为 True。它将在解码节点默认启用。请注意,此功能会消耗更多内存。如果您对内存敏感,请将其设置为 False。" + +#: ../../source/user_guide/release_notes.md:454 +msgid "v0.13.0rc1 - 2025.12.27" +msgstr "v0.13.0rc1 - 2025年12月27日" + +#: ../../source/user_guide/release_notes.md:456 +msgid "" +"This is the first release candidate of v0.13.0 for vLLM Ascend. We landed" +" lots of bug fix, performance improvement and feature support in this " +"release. Any feedback is welcome to help us to improve vLLM Ascend. " +"Please follow the [official " +"doc](https://docs.vllm.ai/projects/ascend/en/latest) to get started." +msgstr "" +"这是 vLLM Ascend v0.13.0 的第一个候选版本。在此版本中,我们修复了大量错误,提升了性能并增加了功能支持。欢迎任何反馈以帮助我们改进 vLLM Ascend。请遵循[官方文档](https://docs.vllm.ai/projects/ascend/en/latest)开始使用。" + +#: ../../source/user_guide/release_notes.md:460 +msgid "" +"Improved the performance of DeepSeek V3.2, please refer to " +"[tutorials](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.13.0rc1/docs/source/tutorials/DeepSeek-V3.2.md)" +msgstr "" +"提升了 DeepSeek V3.2 的性能,请参考[教程](https://github.com/vllm-project/vllm-ascend/blob/v0.13.0rc1/docs/source/tutorials/DeepSeek-V3.2.md)" + +#: ../../source/user_guide/release_notes.md:461 +msgid "" +"Qwen3-Next MTP with chunked prefill is supported now " +"[#4770](https://github.com/vllm-project/vllm-ascend/pull/4770), please " +"refer to [tutorials](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.13.0rc1/docs/source/tutorials/Qwen3-Next.md)" +msgstr "" +"现已支持带分块预填充的 Qwen3-Next MTP [#4770](https://github.com/vllm-project/vllm-ascend/pull/4770),请参考[教程](https://github.com/vllm-project/vllm-ascend/blob/v0.13.0rc1/docs/source/tutorials/Qwen3-Next.md)" + +#: ../../source/user_guide/release_notes.md:462 +msgid "" +"[Experimental] Prefill Context Parallel and Decode Context Parallel are " +"supported, but notice that it is an experimental feature now, welcome any" +" feedback. please refer to [context parallel feature " +"guide](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/context_parallel.html)" +msgstr "" +"[实验性] 支持预填充上下文并行和解码上下文并行,但请注意,目前这是一个实验性功能,欢迎任何反馈。请参考[上下文并行功能指南](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/context_parallel.html)" + +#: ../../source/user_guide/release_notes.md:466 +msgid "" +"Support openPangu Ultra MoE [4615](https://github.com/vllm-project/vllm-" +"ascend/pull/4615)" +msgstr "" +"支持 openPangu Ultra MoE [4615](https://github.com/vllm-project/vllm-ascend/pull/4615)" + +#: ../../source/user_guide/release_notes.md:467 +msgid "" +"A new quantization method W8A16 is supported now. " +"[#4541](https://github.com/vllm-project/vllm-ascend/pull/4541)" +msgstr "" +"现已支持新的量化方法 W8A16 [#4541](https://github.com/vllm-project/vllm-ascend/pull/4541)" + +#: ../../source/user_guide/release_notes.md:468 +msgid "" +"Cross-machine Disaggregated Prefill is supported now. " +"[#5008](https://github.com/vllm-project/vllm-ascend/pull/5008)" +msgstr "" +"现已支持跨机解耦预填充 [#5008](https://github.com/vllm-project/vllm-ascend/pull/5008)" + +#: ../../source/user_guide/release_notes.md:469 +msgid "" +"Add UCMConnector for KV Cache Offloading. [#4411](https://github.com" +"/vllm-project/vllm-ascend/pull/4411)" +msgstr "" +"为 KV Cache 卸载添加 UCMConnector [#4411](https://github.com/vllm-project/vllm-ascend/pull/4411)" + +#: ../../source/user_guide/release_notes.md:470 +msgid "" +"Support async_scheduler and disable_padded_drafter_batch in eagle. " +"[#4893](https://github.com/vllm-project/vllm-ascend/pull/4893)" +msgstr "" +"在 eagle 中支持 async_scheduler 和 disable_padded_drafter_batch [#4893](https://github.com/vllm-project/vllm-ascend/pull/4893)" + +#: ../../source/user_guide/release_notes.md:471 +msgid "" +"Support pcp + mtp in full graph mode. [#4572](https://github.com/vllm-" +"project/vllm-ascend/pull/4572)" +msgstr "" +"在全图模式下支持 pcp + mtp [#4572](https://github.com/vllm-project/vllm-ascend/pull/4572)" + +#: ../../source/user_guide/release_notes.md:472 +msgid "" +"Enhance all-reduce skipping logic for MoE models in NPUModelRunner " +"[#5329](https://github.com/vllm-project/vllm-ascend/pull/5329)" +msgstr "" +"增强 NPUModelRunner 中 MoE 模型的 all-reduce 跳过逻辑 [#5329](https://github.com/vllm-project/vllm-ascend/pull/5329)" + +#: ../../source/user_guide/release_notes.md:476 +msgid "Some general performance improvement:" +msgstr "一些通用的性能改进:" + +#: ../../source/user_guide/release_notes.md:478 +msgid "" +"Add l2norm triton kernel [#4595](https://github.com/vllm-project/vllm-" +"ascend/pull/4595)" +msgstr "" +"添加 l2norm triton 内核 [#4595](https://github.com/vllm-project/vllm-ascend/pull/4595)" + +#: ../../source/user_guide/release_notes.md:479 +msgid "" +"Add new pattern for AddRmsnormQuant with SP, which could only take effect" +" in graph mode. [#5077](https://github.com/vllm-project/vllm-" +"ascend/pull/5077)" +msgstr "" +"为带 SP 的 AddRmsnormQuant 添加新模式,该模式仅在图模式下生效 [#5077](https://github.com/vllm-project/vllm-ascend/pull/5077)" + +#: ../../source/user_guide/release_notes.md:480 +msgid "" +"Add async exponential while model executing. [#4501](https://github.com" +"/vllm-project/vllm-ascend/pull/4501)" +msgstr "" +"在模型执行时添加异步指数 [#4501](https://github.com/vllm-project/vllm-ascend/pull/4501)" + +#: ../../source/user_guide/release_notes.md:481 +msgid "" +"Remove the transpose step after attention and switch to " +"transpose_batchmatmul [#5390](https://github.com/vllm-project/vllm-" +"ascend/pull/5390)" +msgstr "" +"移除注意力后的转置步骤,并切换到 transpose_batchmatmul [#5390](https://github.com/vllm-project/vllm-ascend/pull/5390)" + +#: ../../source/user_guide/release_notes.md:482 +msgid "" +"To optimize the performance in small batch size scenario, an attention " +"operator with flash decoding function is offered, please refer to item 22" +" in [FAQs](https://docs.vllm.ai/projects/ascend/en/latest/faqs.html) to " +"enable it." +msgstr "" +"为优化小批量场景下的性能,提供了一个带有 flash decoding 功能的注意力算子,请参考[常见问题解答](https://docs.vllm.ai/projects/ascend/en/latest/faqs.html)中的第22项来启用它。" + +#: ../../source/user_guide/release_notes.md:484 +#: ../../source/user_guide/release_notes.md:522 +#: ../../source/user_guide/release_notes.md:572 +#: ../../source/user_guide/release_notes.md:610 +#: ../../source/user_guide/release_notes.md:635 +#: ../../source/user_guide/release_notes.md:671 +msgid "Other" +msgstr "其它" + +#: ../../source/user_guide/release_notes.md:486 +msgid "" +"OOM error on VL models is fixed now. We're keeping observing it, if you " +"hit OOM problem again, please submit an issue. [#5136](https://github.com" +"/vllm-project/vllm-ascend/pull/5136)" +msgstr "" +"VL 模型上的 OOM 错误现已修复。我们将持续观察,如果您再次遇到 OOM 问题,请提交 issue [#5136](https://github.com/vllm-project/vllm-ascend/pull/5136)" + +#: ../../source/user_guide/release_notes.md:487 +msgid "" +"Fixed an accuracy bug of Qwen3-Next-MTP when batched inferring. " +"[#4932](https://github.com/vllm-project/vllm-ascend/pull/4932)" +msgstr "" +"修复了 Qwen3-Next-MTP 在批量推理时的精度错误 [#4932](https://github.com/vllm-project/vllm-ascend/pull/4932)" + +#: ../../source/user_guide/release_notes.md:488 +msgid "" +"Fix npu-cpu offloading interface change bug. [#5290](https://github.com" +"/vllm-project/vllm-ascend/pull/5290)" +msgstr "" +"修复 npu-cpu 卸载接口变更导致的错误 [#5290](https://github.com/vllm-project/vllm-ascend/pull/5290)" + +#: ../../source/user_guide/release_notes.md:489 +msgid "" +"Fix MHA model runtime error in aclgraph mode [#5397](https://github.com" +"/vllm-project/vllm-ascend/pull/5397)" +msgstr "" +"修复 MHA 模型在 aclgraph 模式下的运行时错误 [#5397](https://github.com/vllm-project/vllm-ascend/pull/5397)" + +#: ../../source/user_guide/release_notes.md:490 +msgid "" +"Fix unsuitable moe_comm_type under ep=1 scenario " +"[#5388](https://github.com/vllm-project/vllm-ascend/pull/5388)" +msgstr "" +"修复 ep=1 场景下不合适的 moe_comm_type 设置 [#5388](https://github.com/vllm-project/vllm-ascend/pull/5388)" + +#: ../../source/user_guide/release_notes.md:494 +msgid "" +"`VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE` is removed and " +"`VLLM_ASCEND_ENABLE_PREFETCH_MLP` is recommend to replace as they always " +"be enabled together. [#5272](https://github.com/vllm-project/vllm-" +"ascend/pull/5272)" +msgstr "" +"`VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE` 已被移除,建议使用 `VLLM_ASCEND_ENABLE_PREFETCH_MLP` 替代,因为它们通常一起启用 [#5272](https://github.com/vllm-project/vllm-ascend/pull/5272)" + +#: ../../source/user_guide/release_notes.md:495 +msgid "" +"`VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP` is dropped now. " +"[#5270](https://github.com/vllm-project/vllm-ascend/pull/5270)" +msgstr "" +"`VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP` 现已被移除。 " +"[#5270](https://github.com/vllm-project/vllm-ascend/pull/5270)" + +#: ../../source/user_guide/release_notes.md:496 +msgid "" +"`VLLM_ASCEND_ENABLE_NZ` is disabled for float weight case, since we " +"notice that the performance is not good in some float case. Feel free to " +"set it to 2 if you make sure it works for your case. " +"[#4878](https://github.com/vllm-project/vllm-ascend/pull/4878)" +msgstr "" +"对于浮点权重的情况,`VLLM_ASCEND_ENABLE_NZ` 已被禁用,因为我们注意到在某些浮点场景下性能不佳。如果您确认它适用于您的场景,可以将其设置为2。 " +"[#4878](https://github.com/vllm-project/vllm-ascend/pull/4878)" + +#: ../../source/user_guide/release_notes.md:497 +msgid "" +"`chunked_prefill_for_mla` in `additional_config` is dropped now. " +"[#5296](https://github.com/vllm-project/vllm-ascend/pull/5296)" +msgstr "" +"`additional_config` 中的 `chunked_prefill_for_mla` 现已被移除。 " +"[#5296](https://github.com/vllm-project/vllm-ascend/pull/5296)" + +#: ../../source/user_guide/release_notes.md:498 +msgid "" +"`dump_config` in `additional_config` is renamed to `dump_config_path` and" +" the type is change from `dict` to `string`. [#5296](https://github.com" +"/vllm-project/vllm-ascend/pull/5296)" +msgstr "" +"`additional_config` 中的 `dump_config` 已重命名为 `dump_config_path`,其类型也从 `dict` 更改为 `string`。 " +"[#5296](https://github.com/vllm-project/vllm-ascend/pull/5296)" + +#: ../../source/user_guide/release_notes.md:502 +msgid "" +"vLLM version has been upgraded to 0.13.0 and drop 0.12.0 support. " +"[#5146](https://github.com/vllm-project/vllm-ascend/pull/5146)" +msgstr "" +"vLLM 版本已升级至 0.13.0,并停止支持 0.12.0。 " +"[#5146](https://github.com/vllm-project/vllm-ascend/pull/5146)" + +#: ../../source/user_guide/release_notes.md:503 +msgid "" +"Transformer version has been upgraded >= 4.57.3 " +"[#5250](https://github.com/vllm-project/vllm-ascend/pull/5250)" +msgstr "" +"Transformer 版本已升级至 >= 4.57.3 " +"[#5250](https://github.com/vllm-project/vllm-ascend/pull/5250)" + +#: ../../source/user_guide/release_notes.md:507 +msgid "" +"Qwen3-Next doesn't support long sequence scenario, and we should limit " +"`gpu-memory-utilization` according to the doc to run Qwen3-Next. We'll " +"improve it in the next release" +msgstr "" +"Qwen3-Next 目前不支持长序列场景,运行 Qwen3-Next 时需根据文档限制 `gpu-memory-utilization`。我们将在下一个版本中改进此问题。" + +#: ../../source/user_guide/release_notes.md:508 +msgid "" +"The functional break on Qwen3-Next when the input/output is around " +"3.5k/1.5k is fixed, but it introduces a regression on performance. We'll " +"fix it in next release. [#5357](https://github.com/vllm-project/vllm-" +"ascend/issues/5357)" +msgstr "" +"Qwen3-Next 在输入/输出约为 3.5k/1.5k 时出现的功能中断问题已修复,但这导致了性能回退。我们将在下一个版本中修复此问题。 " +"[#5357](https://github.com/vllm-project/vllm-ascend/issues/5357)" + +#: ../../source/user_guide/release_notes.md:509 +msgid "" +"There is a precision issue with curl on ultra-short sequences in " +"DeepSeek-V3.2. We'll fix it in next release. [#5370](https://github.com" +"/vllm-project/vllm-ascend/issues/5370)" +msgstr "" +"DeepSeek-V3.2 在超短序列上存在 curl 精度问题。我们将在下一个版本中修复此问题。 " +"[#5370](https://github.com/vllm-project/vllm-ascend/issues/5370)" + +#: ../../source/user_guide/release_notes.md:511 +msgid "v0.11.0 - 2025.12.16" +msgstr "v0.11.0 - 2025年12月16日" + +#: ../../source/user_guide/release_notes.md:513 +msgid "" +"We're excited to announce the release of v0.11.0 for vLLM Ascend. This is" +" the official release for v0.11.0. Please follow the [official " +"doc](https://docs.vllm.ai/projects/ascend/en/v0.11.0) to get started. " +"We'll consider to release post version in the future if needed. This " +"release note will only contain the important change and note from " +"v0.11.0rc3." +msgstr "" +"我们很高兴地宣布 vLLM Ascend v0.11.0 版本发布。这是 v0.11.0 的正式版本。请按照 [官方文档](https://docs.vllm.ai/projects/ascend/en/v0.11.0) 开始使用。如有需要,我们未来会考虑发布后续版本。本版本说明仅包含自 v0.11.0rc3 以来的重要变更和注意事项。" + +#: ../../source/user_guide/release_notes.md:517 +msgid "" +"Improved the performance for deepseek 3/3.1. [#3995](https://github.com" +"/vllm-project/vllm-ascend/pull/3995)" +msgstr "" +"提升了 deepseek 3/3.1 的性能。 [#3995](https://github.com/vllm-project/vllm-ascend/pull/3995)" + +#: ../../source/user_guide/release_notes.md:518 +msgid "" +"Fixed the accuracy bug for qwen3-vl. [#4811](https://github.com/vllm-" +"project/vllm-ascend/pull/4811)" +msgstr "" +"修复了 qwen3-vl 的精度问题。 [#4811](https://github.com/vllm-project/vllm-ascend/pull/4811)" + +#: ../../source/user_guide/release_notes.md:519 +msgid "" +"Improved the performance of sample. [#4153](https://github.com/vllm-" +"project/vllm-ascend/pull/4153)" +msgstr "" +"提升了采样性能。 [#4153](https://github.com/vllm-project/vllm-ascend/pull/4153)" + +#: ../../source/user_guide/release_notes.md:520 +msgid "" +"Eagle3 is back now. [#4721](https://github.com/vllm-project/vllm-" +"ascend/pull/4721)" +msgstr "" +"Eagle3 现已恢复支持。 [#4721](https://github.com/vllm-project/vllm-ascend/pull/4721)" + +#: ../../source/user_guide/release_notes.md:524 +msgid "" +"Improved the performance for kimi-k2. [#4555](https://github.com/vllm-" +"project/vllm-ascend/pull/4555)" +msgstr "" +"提升了 kimi-k2 的性能。 [#4555](https://github.com/vllm-project/vllm-ascend/pull/4555)" + +#: ../../source/user_guide/release_notes.md:525 +msgid "" +"Fixed a quantization bug for deepseek3.2-exp. [#4797](https://github.com" +"/vllm-project/vllm-ascend/pull/4797)" +msgstr "" +"修复了 deepseek3.2-exp 的一个量化问题。 [#4797](https://github.com/vllm-project/vllm-ascend/pull/4797)" + +#: ../../source/user_guide/release_notes.md:526 +msgid "" +"Fixed qwen3-vl-moe bug under high concurrency. [#4658](https://github.com" +"/vllm-project/vllm-ascend/pull/4658)" +msgstr "" +"修复了 qwen3-vl-moe 在高并发下的问题。 [#4658](https://github.com/vllm-project/vllm-ascend/pull/4658)" + +#: ../../source/user_guide/release_notes.md:527 +msgid "" +"Fixed an accuracy bug for Prefill Decode disaggregation case. " +"[#4437](https://github.com/vllm-project/vllm-ascend/pull/4437)" +msgstr "" +"修复了 Prefill Decode 解耦场景下的一个精度问题。 " +"[#4437](https://github.com/vllm-project/vllm-ascend/pull/4437)" + +#: ../../source/user_guide/release_notes.md:528 +msgid "" +"Fixed some bugs for EPLB [#4576](https://github.com/vllm-project/vllm-" +"ascend/pull/4576) [#4777](https://github.com/vllm-project/vllm-" +"ascend/pull/4777)" +msgstr "" +"修复了 EPLB 的一些问题。 [#4576](https://github.com/vllm-project/vllm-ascend/pull/4576) [#4777](https://github.com/vllm-project/vllm-ascend/pull/4777)" + +#: ../../source/user_guide/release_notes.md:529 +msgid "" +"Fixed the version incompatibility issue for openEuler docker image. " +"[#4745](https://github.com/vllm-project/vllm-ascend/pull/4745)" +msgstr "" +"修复了 openEuler docker 镜像的版本兼容性问题。 " +"[#4745](https://github.com/vllm-project/vllm-ascend/pull/4745)" + +#: ../../source/user_guide/release_notes.md:531 +msgid "Deprecation announcement" +msgstr "弃用声明" + +#: ../../source/user_guide/release_notes.md:533 +msgid "LLMdatadist connector has been deprecated, it'll be removed in v0.12.0rc1" +msgstr "LLMdatadist connector 已被弃用,将在 v0.12.0rc1 中移除。" + +#: ../../source/user_guide/release_notes.md:534 +msgid "Torchair graph has been deprecated, it'll be removed in v0.12.0rc1" +msgstr "Torchair graph 已被弃用,将在 v0.12.0rc1 中移除。" + +#: ../../source/user_guide/release_notes.md:535 +msgid "Ascend scheduler has been deprecated, it'll be removed in v0.12.0rc1" +msgstr "Ascend scheduler 已被弃用,将在 v0.12.0rc1 中移除。" + +#: ../../source/user_guide/release_notes.md:537 +msgid "Upgrade notice" +msgstr "升级须知" + +#: ../../source/user_guide/release_notes.md:539 +#: ../../source/user_guide/release_notes.md:606 +msgid "" +"torch-npu is upgraded to 2.7.1.post1. Please note that the package is " +"pushed to [pypi mirror](https://mirrors.huaweicloud.com/ascend/repos/pypi" +"/torch-npu/). So it's hard to add it to auto dependence. Please install " +"it by yourself." +msgstr "" +"torch-npu 已升级至 2.7.1.post1。请注意,该软件包已推送至 [pypi 镜像](https://mirrors.huaweicloud.com/ascend/repos/pypi/torch-npu/)。因此很难将其添加到自动依赖中。请自行安装。" + +#: ../../source/user_guide/release_notes.md:540 +msgid "CANN is upgraded to 8.3.rc2." +msgstr "CANN 已升级至 8.3.rc2。" + +#: ../../source/user_guide/release_notes.md:544 +msgid "" +"Qwen3-Next doesn't support expert parallel and MTP features in this " +"release. And it'll be oom if the input is too long. We'll improve it in " +"the next release" +msgstr "" +"Qwen3-Next 在此版本中不支持专家并行和 MTP 功能。如果输入过长,将会出现内存不足(OOM)。我们将在下一个版本中改进此问题。" + +#: ../../source/user_guide/release_notes.md:545 +msgid "" +"Deepseek 3.2 only work with torchair graph mode in this release. We'll " +"make it work with aclgraph mode in the next release." +msgstr "" +"Deepseek 3.2 在此版本中仅支持 torchair graph 模式。我们将在下一个版本中使其支持 aclgraph 模式。" + +#: ../../source/user_guide/release_notes.md:546 +msgid "" +"Qwen2-audio doesn't work by default. Temporary solution is to set `--gpu-" +"memory-utilization` to a suitable value, such as 0.8." +msgstr "" +"Qwen2-audio 默认无法工作。临时解决方案是将 `--gpu-memory-utilization` 设置为合适的值,例如 0.8。" + +#: ../../source/user_guide/release_notes.md:547 +msgid "" +"CPU bind feature doesn't work if more than one vLLM instance is running " +"on the same node." +msgstr "" +"如果在同一节点上运行多个 vLLM 实例,CPU 绑定功能将无法工作。" + +#: ../../source/user_guide/release_notes.md:549 +msgid "v0.12.0rc1 - 2025.12.13" +msgstr "v0.12.0rc1 - 2025年12月13日" + +#: ../../source/user_guide/release_notes.md:551 +msgid "" +"This is the first release candidate of v0.12.0 for vLLM Ascend. We landed" +" lots of bug fix, performance improvement and feature support in this " +"release. Any feedback is welcome to help us to improve vLLM Ascend. " +"Please follow the [official " +"doc](https://docs.vllm.ai/projects/ascend/en/latest) to get started." +msgstr "" +"这是 vLLM Ascend v0.12.0 的第一个候选版本。我们在本次发布中修复了大量问题,提升了性能并增加了功能支持。欢迎任何反馈以帮助我们改进 vLLM Ascend。请按照 [官方文档](https://docs.vllm.ai/projects/ascend/en/latest) 开始使用。" + +#: ../../source/user_guide/release_notes.md:555 +msgid "" +"DeepSeek 3.2 is stable and performance is improved. In this release, you " +"don't need to install any other packages now. Following the [official " +"tutorial](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.12.0rc1/docs/source/tutorials/DeepSeek-V3.2.md) to start " +"using it." +msgstr "" +"DeepSeek 3.2 已稳定且性能得到提升。在此版本中,您无需安装任何其他软件包。请按照 [官方教程](https://github.com/vllm-project/vllm-ascend/blob/v0.12.0rc1/docs/source/tutorials/DeepSeek-V3.2.md) 开始使用。" + +#: ../../source/user_guide/release_notes.md:556 +msgid "" +"Async scheduler is more stable and ready to enable now. Please set " +"`--async-scheduling` to enable it." +msgstr "" +"异步调度器现已更加稳定并可以启用。请设置 `--async-scheduling` 来启用它。" + +#: ../../source/user_guide/release_notes.md:557 +msgid "" +"More new models, such as Qwen3-omni, DeepSeek OCR, PaddleOCR, OpenCUA are" +" supported now." +msgstr "" +"现已支持更多新模型,例如 Qwen3-omni、DeepSeek OCR、PaddleOCR、OpenCUA。" + +#: ../../source/user_guide/release_notes.md:559 +#: ../../source/user_guide/release_notes.md:629 +#: ../../source/user_guide/release_notes.md:663 +#: ../../source/user_guide/release_notes.md:697 +#: ../../source/user_guide/release_notes.md:721 +#: ../../source/user_guide/release_notes.md:765 +#: ../../source/user_guide/release_notes.md:834 +#: ../../source/user_guide/release_notes.md:859 +#: ../../source/user_guide/release_notes.md:894 +#: ../../source/user_guide/release_notes.md:998 +#: ../../source/user_guide/release_notes.md:1066 +#: ../../source/user_guide/release_notes.md:1118 +#: ../../source/user_guide/release_notes.md:1177 +#: ../../source/user_guide/release_notes.md:1241 +#: ../../source/user_guide/release_notes.md:1267 +#: ../../source/user_guide/release_notes.md:1293 +#: ../../source/user_guide/release_notes.md:1316 +#: ../../source/user_guide/release_notes.md:1342 +#: ../../source/user_guide/release_notes.md:1368 +#: ../../source/user_guide/release_notes.md:1405 msgid "Core" msgstr "核心" -#: ../../user_guide/release_notes.md:13 +#: ../../source/user_guide/release_notes.md:561 msgid "" -"Ascend PyTorch adapter (torch_npu) has been upgraded to `2.5.1.post1." -"dev20250619`. Don’t forget to update it in your environment. [#1347]" -"(https://github.com/vllm-project/vllm-ascend/pull/1347)" +"[Experimental] Full decode only graph mode is supported now. Although it " +"is not enabled by default, we suggest to enable it by `--compilation-" +"config '{\"cudagraph_mode\":\"FULL_DECODE_ONLY\"}'` in most case. Let us " +"know if you hit any error. We'll keep improve it and enable it by default" +" in next few release." msgstr "" -"Ascend PyTorch 适配器(torch_npu)已升级到 `2.5.1.post1.dev20250619`。请不要" -"忘记在您的环境中进行更新。 [#1347](https://github.com/vllm-project/vllm-" -"ascend/pull/1347)" +"[实验性] 现已支持全解码专用图模式。虽然默认未启用,但我们建议在大多数情况下通过 `--compilation-config '{\"cudagraph_mode\":\"FULL_DECODE_ONLY\"}'` 启用它。如果您遇到任何错误,请告知我们。我们将持续改进,并在未来几个版本中默认启用。" -#: ../../user_guide/release_notes.md:14 +#: ../../source/user_guide/release_notes.md:562 msgid "" -"The **GatherV3** error has been fixed with **aclgraph** mode. [#1416]" -"(https://github.com/vllm-project/vllm-ascend/pull/1416)" +"Lots of triton kernel are added. The performance of vLLM Ascend, " +"especially Qwen3-Next and DeepSeek 3.2 is improved. Please note that " +"triton is not installed and enabled by default, but we suggest to enable " +"it in most case. You can download and install it by hand from [package " +"url](https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-" +"ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl)." +" If you're running vLLM Ascend with X86, you need to build triton ascend " +"by yourself from [source](https://gitcode.com/Ascend/triton-ascend)" msgstr "" -"**GatherV3** 错误已通过 **aclgraph** 模式修复。[#1416](https://github.com/" -"vllm-project/vllm-ascend/pull/1416)" +"新增了大量 triton 内核。vLLM Ascend 的性能,特别是 Qwen3-Next 和 DeepSeek 3.2 的性能得到了提升。请注意,triton 默认未安装和启用,但我们建议在大多数情况下启用它。您可以从 [软件包链接](https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl) 手动下载并安装。如果您在 X86 架构上运行 vLLM Ascend,则需要从 [源代码](https://gitcode.com/Ascend/triton-ascend) 自行构建 triton ascend。" -#: ../../user_guide/release_notes.md:15 +#: ../../source/user_guide/release_notes.md:563 msgid "" -"W8A8 quantization works on Atlas 300I series now. [#1560](https://github." -"com/vllm-project/vllm-ascend/pull/1560)" +"Lots of Ascend ops are added to improve the performance. It means that " +"from this release vLLM Ascend only works with custom ops built. So we " +"removed the env `COMPILE_CUSTOM_KERNELS`. You can not set it to 0 now." msgstr "" -"W8A8 量化现在可以在 Atlas 300I 系列上运行了。[#1560](https://github.com/" -"vllm-project/vllm-ascend/pull/1560)" +"新增了大量 Ascend 算子以提升性能。这意味着从本版本开始,vLLM Ascend 仅在使用自定义算子构建时才能工作。因此我们移除了环境变量 `COMPILE_CUSTOM_KERNELS`。您现在无法再将其设置为 0。" -#: ../../user_guide/release_notes.md:16 +#: ../../source/user_guide/release_notes.md:564 +msgid "" +"speculative decode method `MTP` is more stable now. It can be enabled " +"with most case and decode token number can be 1,2,3." +msgstr "" +"推测解码方法 `MTP` 现已更加稳定。它可以在大多数情况下启用,解码令牌数可以是 1、2、3。" + +#: ../../source/user_guide/release_notes.md:565 +msgid "" +"speculative decode method `suffix` is supported now. Thanks for the " +"contribution from China Merchants Bank." +msgstr "" +"现已支持推测解码方法 `suffix`。感谢招商银行的贡献。" + +#: ../../source/user_guide/release_notes.md:566 +msgid "" +"llm-comppressor quantization tool with W8A8 works now. You can now deploy" +" the model with W8A8 quantization from this tool directly." +msgstr "" +"支持 W8A8 的 llm-comppressor 量化工具现已可用。您现在可以直接使用此工具部署经过 W8A8 量化的模型。" + +#: ../../source/user_guide/release_notes.md:567 +msgid "W4A4 quantization works now." +msgstr "W4A4 量化现已可用。" + +#: ../../source/user_guide/release_notes.md:568 +msgid "" +"Support features flashcomm1 and flashcomm2 in paper " +"[flashcomm](https://arxiv.org/pdf/2412.04964) [#3004](https://github.com" +"/vllm-project/vllm-ascend/pull/3004) [#3334](https://github.com/vllm-" +"project/vllm-ascend/pull/3334)" +msgstr "" +"支持论文 [flashcomm](https://arxiv.org/pdf/2412.04964) 中的 flashcomm1 和 flashcomm2 特性 [#3004](https://github.com/vllm-project/vllm-ascend/pull/3004) [#3334](https://github.com/vllm-project/vllm-ascend/pull/3334)" + +#: ../../source/user_guide/release_notes.md:569 +msgid "Pooling model, such as bge, reranker, etc. are supported now" +msgstr "现已支持池化模型,例如 bge、reranker 等。" + +#: ../../source/user_guide/release_notes.md:570 +msgid "" +"Official doc has been improved. we refactored the tutorial to make it " +"more clear. The user guide and developer guide is more complete now. " +"We'll keep improving it." +msgstr "" +"官方文档已得到改进。我们重构了教程,使其更加清晰。用户指南和开发者指南现在更加完整。我们将持续改进。" + +#: ../../source/user_guide/release_notes.md:574 +msgid "[Experimental] Mooncake layerwise connector is supported now." +msgstr "[实验性] 现已支持 Mooncake 分层连接器。" + +#: ../../source/user_guide/release_notes.md:575 +msgid "" +"[Experimental] [KV cache " +"pool](https://docs.vllm.ai/projects/ascend/en/latest/developer_guide/Design_Documents/KV_Cache_Pool_Guide.html)" +" feature is added" +msgstr "" +"[实验性] 新增 [KV 缓存池](https://docs.vllm.ai/projects/ascend/en/latest/developer_guide/Design_Documents/KV_Cache_Pool_Guide.html) 功能" + +#: ../../source/user_guide/release_notes.md:576 +msgid "" +"[Experimental] A new graph mode `xlite` is introduced. It performs good " +"with some models. Following the [official " +"tutorial](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/graph_mode.html" +"#using-xlitegraph) to start using it." +msgstr "" +"[实验性] 引入新的图模式 `xlite`。它在某些模型上表现良好。请按照[官方教程](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/graph_mode.html#using-xlitegraph)开始使用。" + +#: ../../source/user_guide/release_notes.md:577 +msgid "" +"LLMdatadist kv connector is removed. Please use mooncake connector " +"instead." +msgstr "已移除 LLMdatadist kv 连接器。请改用 mooncake 连接器。" + +#: ../../source/user_guide/release_notes.md:578 +msgid "" +"Ascend scheduler is removed. `--additional-config {\"ascend_scheduler\": " +"{\"enabled\": true}` doesn't work anymore." +msgstr "已移除 Ascend 调度器。`--additional-config {\"ascend_scheduler\": {\"enabled\": true}` 不再生效。" + +#: ../../source/user_guide/release_notes.md:579 +msgid "" +"Torchair graph mode is removed. `--additional-config " +"{\"torchair_graph_config\": {\"enabled\": true}}` doesn't work anymore. " +"Please use aclgraph instead." +msgstr "已移除 Torchair 图模式。`--additional-config {\"torchair_graph_config\": {\"enabled\": true}}` 不再生效。请改用 aclgraph。" + +#: ../../source/user_guide/release_notes.md:580 +msgid "" +"`VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION` env is removed. This feature " +"is stable enough. We enable it by default now." +msgstr "已移除 `VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION` 环境变量。此功能已足够稳定,现默认启用。" + +#: ../../source/user_guide/release_notes.md:581 +msgid "speculative decode method `Ngram` is back now." +msgstr "推测解码方法 `Ngram` 现已恢复。" + +#: ../../source/user_guide/release_notes.md:582 +msgid "" +"msprobe tool is added to help user to check the model accuracy. Please " +"follow the [official " +"doc](https://docs.vllm.ai/projects/ascend/en/latest/developer_guide/performance_and_debug/msprobe_guide.html)" +" to get started." +msgstr "" +"新增 msprobe 工具,帮助用户检查模型精度。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/latest/developer_guide/performance_and_debug/msprobe_guide.html)开始使用。" + +#: ../../source/user_guide/release_notes.md:583 +msgid "" +"msserviceprofiler tool is added to help user to profile the model " +"performance. Please follow the [official doc](https://github.com/vllm-" +"project/vllm-" +"ascend/blob/v0.12.0rc1/docs/source/developer_guide/performance_and_debug/service_profiling_guide.md)" +" to get started." +msgstr "" +"新增 msserviceprofiler 工具,帮助用户分析模型性能。请按照[官方文档](https://github.com/vllm-project/vllm-ascend/blob/v0.12.0rc1/docs/source/developer_guide/performance_and_debug/service_profiling_guide.md)开始使用。" + +#: ../../source/user_guide/release_notes.md:585 +msgid "Upgrade Note" +msgstr "升级说明" + +#: ../../source/user_guide/release_notes.md:587 +msgid "" +"vLLM Ascend self maintained modeling file has been removed. The related " +"python entrypoint is removed as well. So please uninstall the old version" +" of vLLM Ascend in your env before upgrade." +msgstr "vLLM Ascend 自维护的建模文件已被移除,相关的 Python 入口点也已移除。因此,请在升级前卸载环境中的旧版本 vLLM Ascend。" + +#: ../../source/user_guide/release_notes.md:588 +msgid "" +"CANN is upgraded to 8.3.RC2, Pytorch and torch-npu are upgraded to 2.8.0." +" Don't forget to install them." +msgstr "CANN 已升级至 8.3.RC2,Pytorch 和 torch-npu 已升级至 2.8.0。请勿忘记安装。" + +#: ../../source/user_guide/release_notes.md:589 +msgid "Python 3.9 support is dropped to keep the same with vLLM v0.12.0" +msgstr "为与 vLLM v0.12.0 保持一致,已放弃对 Python 3.9 的支持" + +#: ../../source/user_guide/release_notes.md:593 +msgid "" +"DeepSeek 3/3.1 and Qwen3 doesn't work with FULL_DECODE_ONLY graph mode. " +"We'll fix it in next release. [#4990](https://github.com/vllm-project" +"/vllm-ascend/pull/4990)" +msgstr "DeepSeek 3/3.1 和 Qwen3 在 FULL_DECODE_ONLY 图模式下无法工作。我们将在下个版本修复此问题。[#4990](https://github.com/vllm-project/vllm-ascend/pull/4990)" + +#: ../../source/user_guide/release_notes.md:594 +msgid "" +"Hunyuan OCR doesn't work. We'll fix it in the next release. " +"[#4989](https://github.com/vllm-project/vllm-ascend/pull/4989) " +"[#4992](https://github.com/vllm-project/vllm-ascend/pull/4992)" +msgstr "Hunyuan OCR 无法工作。我们将在下个版本修复此问题。[#4989](https://github.com/vllm-project/vllm-ascend/pull/4989) [#4992](https://github.com/vllm-project/vllm-ascend/pull/4992)" + +#: ../../source/user_guide/release_notes.md:595 +msgid "" +"DeepSeek 3.2 doesn't work with chat template. It because that vLLM " +"v0.12.0 doesn't support it. We'll support in the next v0.13.0rc1 version." +msgstr "DeepSeek 3.2 无法与聊天模板协同工作。这是因为 vLLM v0.12.0 不支持它。我们将在下一个 v0.13.0rc1 版本中提供支持。" + +#: ../../source/user_guide/release_notes.md:596 +msgid "" +"DeepSeek 3.2 doesn't work with high concurrency in some case. We'll fix " +"it in next release. [#4996](https://github.com/vllm-project/vllm-" +"ascend/pull/4996)" +msgstr "DeepSeek 3.2 在某些情况下无法在高并发下工作。我们将在下个版本修复此问题。[#4996](https://github.com/vllm-project/vllm-ascend/pull/4996)" + +#: ../../source/user_guide/release_notes.md:597 +msgid "" +"We notice that bf16/fp16 model doesn't perform well, it's mainly because " +"that `VLLM_ASCEND_ENABLE_NZ` is enabled by default. Please set " +"`VLLM_ASCEND_ENABLE_NZ=0` to disable it. We'll add the auto detection " +"mechanism in next release." +msgstr "我们注意到 bf16/fp16 模型性能不佳,主要是因为 `VLLM_ASCEND_ENABLE_NZ` 默认启用。请设置 `VLLM_ASCEND_ENABLE_NZ=0` 来禁用它。我们将在下个版本添加自动检测机制。" + +#: ../../source/user_guide/release_notes.md:598 +msgid "" +"speculative decode method `suffix` doesn't work. We'll fix it in next " +"release. You can pick this commit to fix the issue: " +"[#5010](https://github.com/vllm-project/vllm-ascend/pull/5010)" +msgstr "推测解码方法 `suffix` 无法工作。我们将在下个版本修复此问题。你可以选取此提交来修复问题:[#5010](https://github.com/vllm-project/vllm-ascend/pull/5010)" + +#: ../../source/user_guide/release_notes.md:600 +msgid "v0.11.0rc3 - 2025.12.03" +msgstr "v0.11.0rc3 - 2025年12月03日" + +#: ../../source/user_guide/release_notes.md:602 +msgid "" +"This is the third release candidate of v0.11.0 for vLLM Ascend. For " +"quality reasons, we released a new rc before the official release. Thanks" +" for all your feedback. Please follow the [official " +"doc](https://docs.vllm.ai/projects/ascend/en/v0.11.0) to get started." +msgstr "这是 vLLM Ascend v0.11.0 的第三个候选发布版本。出于质量考虑,我们在正式发布前发布了新的 rc 版本。感谢您的所有反馈。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.11.0)开始使用。" + +#: ../../source/user_guide/release_notes.md:607 +msgid "" +"Disable NZ weight loader to speed up dense model. Please note that this " +"is a temporary solution. If you find the performance becomes bad, please " +"let us know. We'll keep improving it. [#4495](https://github.com/vllm-" +"project/vllm-ascend/pull/4495)" +msgstr "禁用 NZ 权重加载器以加速稠密模型。请注意,这是一个临时解决方案。如果您发现性能变差,请告知我们。我们将持续改进。[#4495](https://github.com/vllm-project/vllm-ascend/pull/4495)" + +#: ../../source/user_guide/release_notes.md:608 +msgid "" +"mooncake is installed in official docker image now. You can use it " +"directly in container now. [#4506](https://github.com/vllm-project/vllm-" +"ascend/pull/4506)" +msgstr "mooncake 现已安装在官方 Docker 镜像中。您现在可以直接在容器中使用它。[#4506](https://github.com/vllm-project/vllm-ascend/pull/4506)" + +#: ../../source/user_guide/release_notes.md:612 +msgid "" +"Fix an OOM issue for moe models. [#4367](https://github.com/vllm-project" +"/vllm-ascend/pull/4367)" +msgstr "修复了 MoE 模型的 OOM 问题。[#4367](https://github.com/vllm-project/vllm-ascend/pull/4367)" + +#: ../../source/user_guide/release_notes.md:613 +msgid "" +"Fix hang issue of multimodal model when running with DP>1 " +"[#4393](https://github.com/vllm-project/vllm-ascend/pull/4393)" +msgstr "修复了多模态模型在 DP>1 运行时挂起的问题 [#4393](https://github.com/vllm-project/vllm-ascend/pull/4393)" + +#: ../../source/user_guide/release_notes.md:614 +msgid "" +"Fix some bugs for EPLB [#4416](https://github.com/vllm-project/vllm-" +"ascend/pull/4416)" +msgstr "修复了 EPLB 的一些错误 [#4416](https://github.com/vllm-project/vllm-ascend/pull/4416)" + +#: ../../source/user_guide/release_notes.md:615 +msgid "" +"Fix bug for mtp>1 + lm_head_tp>1 case [#4360](https://github.com/vllm-" +"project/vllm-ascend/pull/4360)" +msgstr "修复了 mtp>1 + lm_head_tp>1 情况下的错误 [#4360](https://github.com/vllm-project/vllm-ascend/pull/4360)" + +#: ../../source/user_guide/release_notes.md:616 +msgid "" +"Fix a accuracy issue when running vLLM serve for long time. " +"[#4117](https://github.com/vllm-project/vllm-ascend/pull/4117)" +msgstr "修复了长时间运行 vLLM 服务时的精度问题。[#4117](https://github.com/vllm-project/vllm-ascend/pull/4117)" + +#: ../../source/user_guide/release_notes.md:617 +msgid "" +"Fix a function bug when running qwen2.5 vl under high concurrency. " +"[#4553](https://github.com/vllm-project/vllm-ascend/pull/4553)" +msgstr "修复了在高并发下运行 qwen2.5 vl 时的功能错误。[#4553](https://github.com/vllm-project/vllm-ascend/pull/4553)" + +#: ../../source/user_guide/release_notes.md:619 +msgid "v0.11.0rc2 - 2025.11.21" +msgstr "v0.11.0rc2 - 2025年11月21日" + +#: ../../source/user_guide/release_notes.md:621 +msgid "" +"This is the second release candidate of v0.11.0 for vLLM Ascend. In this " +"release, we solved many bugs to improve the quality. Thanks for all your " +"feedback. We'll keep working on bug fix and performance improvement. The " +"v0.11.0 official release will come soon. Please follow the [official " +"doc](https://docs.vllm.ai/projects/ascend/en/v0.11.0) to get started." +msgstr "这是 vLLM Ascend v0.11.0 的第二个候选发布版本。在此版本中,我们修复了许多错误以提升质量。感谢您的所有反馈。我们将继续致力于错误修复和性能改进。v0.11.0 正式版即将发布。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.11.0)开始使用。" + +#: ../../source/user_guide/release_notes.md:625 +msgid "" +"CANN is upgraded to 8.3.RC2. [#4332](https://github.com/vllm-project" +"/vllm-ascend/pull/4332)" +msgstr "CANN 已升级至 8.3.RC2。[#4332](https://github.com/vllm-project/vllm-ascend/pull/4332)" + +#: ../../source/user_guide/release_notes.md:626 +msgid "" +"Ngram spec decode method is back now. [#4092](https://github.com/vllm-" +"project/vllm-ascend/pull/4092)" +msgstr "Ngram 推测解码方法现已恢复。[#4092](https://github.com/vllm-project/vllm-ascend/pull/4092)" + +#: ../../source/user_guide/release_notes.md:627 +msgid "" +"The performance of aclgraph is improved by updating default capture size." +" [#4205](https://github.com/vllm-project/vllm-ascend/pull/4205)" +msgstr "通过更新默认捕获大小,提升了 aclgraph 的性能。[#4205](https://github.com/vllm-project/vllm-ascend/pull/4205)" + +#: ../../source/user_guide/release_notes.md:631 +msgid "" +"Speed up vLLM startup time. [#4099](https://github.com/vllm-project/vllm-" +"ascend/pull/4099)" +msgstr "加速了 vLLM 启动时间。[#4099](https://github.com/vllm-project/vllm-ascend/pull/4099)" + +#: ../../source/user_guide/release_notes.md:632 +msgid "" +"Kimi k2 with quantization works now. [#4190](https://github.com/vllm-" +"project/vllm-ascend/pull/4190)" +msgstr "量化后的 Kimi k2 现已可以工作。[#4190](https://github.com/vllm-project/vllm-ascend/pull/4190)" + +#: ../../source/user_guide/release_notes.md:633 +msgid "" +"Fix a bug for qwen3-next. It's more stable now. " +"[#4025](https://github.com/vllm-project/vllm-ascend/pull/4025)" +msgstr "修复了 qwen3-next 的一个错误。现在它更稳定了。[#4025](https://github.com/vllm-project/vllm-ascend/pull/4025)" + +#: ../../source/user_guide/release_notes.md:637 +msgid "" +"Fix an issue for full decode only mode. Full graph mode is more stable " +"now. [#4106](https://github.com/vllm-project/vllm-ascend/pull/4106) " +"[#4282](https://github.com/vllm-project/vllm-ascend/pull/4282)" +msgstr "修复了仅全解码模式的一个问题。全图模式现在更稳定了。[#4106](https://github.com/vllm-project/vllm-ascend/pull/4106) [#4282](https://github.com/vllm-project/vllm-ascend/pull/4282)" + +#: ../../source/user_guide/release_notes.md:638 +msgid "" +"Fix a allgather ops bug for DeepSeek V3 series models. " +"[#3711](https://github.com/vllm-project/vllm-ascend/pull/3711)" +msgstr "修复了 DeepSeek V3 系列模型的 allgather 操作错误。[#3711](https://github.com/vllm-project/vllm-ascend/pull/3711)" + +#: ../../source/user_guide/release_notes.md:639 +msgid "" +"Fix some bugs for EPLB feature. [#4150](https://github.com/vllm-project" +"/vllm-ascend/pull/4150) [#4334](https://github.com/vllm-project/vllm-" +"ascend/pull/4334)" +msgstr "" +"修复了 EPLB 功能的一些错误。 [#4150](https://github.com/vllm-project/vllm-" +"ascend/pull/4150) [#4334](https://github.com/vllm-project/vllm-" +"ascend/pull/4334)" + +#: ../../source/user_guide/release_notes.md:640 +msgid "" +"Fix a bug that vl model doesn't work on x86 machine. " +"[#4285](https://github.com/vllm-project/vllm-ascend/pull/4285)" +msgstr "" +"修复了 VL 模型在 x86 机器上无法工作的错误。 " +"[#4285](https://github.com/vllm-project/vllm-ascend/pull/4285)" + +#: ../../source/user_guide/release_notes.md:641 +msgid "" +"Support ipv6 for prefill disaggregation proxy. Please note that mooncake " +"connector doesn't work with ipv6. We're working on it. " +"[#4242](https://github.com/vllm-project/vllm-ascend/pull/4242)" +msgstr "" +"为预填充解耦代理支持 IPv6。请注意,mooncake 连接器暂不支持 IPv6,我们正在处理中。 " +"[#4242](https://github.com/vllm-project/vllm-ascend/pull/4242)" + +#: ../../source/user_guide/release_notes.md:642 +msgid "" +"Add a check that to ensure EPLB only support w8a8 method for quantization" +" case. [#4315](https://github.com/vllm-project/vllm-ascend/pull/4315)" +msgstr "" +"添加检查以确保 EPLB 在量化场景下仅支持 w8a8 方法。 " +"[#4315](https://github.com/vllm-project/vllm-ascend/pull/4315)" + +#: ../../source/user_guide/release_notes.md:643 +msgid "" +"Add a check that to ensure FLASHCOMM feature doesn't work with vl model. " +"It'll be supported in 2025 Q4 [#4222](https://github.com/vllm-project" +"/vllm-ascend/pull/4222)" +msgstr "" +"添加检查以确保 FLASHCOMM 功能不与 VL 模型同时使用。该功能计划于 2025 年第四季度支持。 " +"[#4222](https://github.com/vllm-project/vllm-ascend/pull/4222)" + +#: ../../source/user_guide/release_notes.md:644 +msgid "" +"Audio required library is installed in container. " +"[#4324](https://github.com/vllm-project/vllm-ascend/pull/4324)" +msgstr "" +"容器中已安装音频所需的库。 " +"[#4324](https://github.com/vllm-project/vllm-ascend/pull/4324)" + +#: ../../source/user_guide/release_notes.md:648 +msgid "" +"Ray + EP doesn't work, if you run vLLM Ascend with ray, please disable " +"expert parallelism. [#4123](https://github.com/vllm-project/vllm-" +"ascend/pull/4123)" +msgstr "" +"Ray 与专家并行 (EP) 不兼容,如果您使用 Ray 运行 vLLM Ascend,请禁用专家并行。 " +"[#4123](https://github.com/vllm-project/vllm-ascend/pull/4123)" + +#: ../../source/user_guide/release_notes.md:649 +msgid "" +"`response_format` parameter is not supported yet. We'll support it soon. " +"[#4175](https://github.com/vllm-project/vllm-ascend/pull/4175)" +msgstr "" +"`response_format` 参数暂不支持,我们将很快提供支持。 " +"[#4175](https://github.com/vllm-project/vllm-ascend/pull/4175)" + +#: ../../source/user_guide/release_notes.md:650 +msgid "" +"cpu bind feature doesn't work for multi instance case(Such as multi DP on" +" one node). We'll fix it in the next release." +msgstr "" +"CPU 绑定功能在多实例场景(例如单节点上多个 DP)下无效。我们将在下一个版本中修复此问题。" + +#: ../../source/user_guide/release_notes.md:652 +msgid "v0.11.0rc1 - 2025.11.10" +msgstr "v0.11.0rc1 - 2025.11.10" + +#: ../../source/user_guide/release_notes.md:654 +msgid "" +"This is the first release candidate of v0.11.0 for vLLM Ascend. Please " +"follow the [official " +"doc](https://docs.vllm.ai/projects/ascend/en/v0.11.0) to get started. " +"v0.11.0 will be the next official release version of vLLM Ascend. We'll " +"release it in the next few days. Any feedback is welcome to help us to " +"improve v0.11.0." +msgstr "" +"这是 vLLM Ascend v0.11.0 的第一个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.11.0)开始使用。v0.11.0 将是 vLLM Ascend 的下一个正式发布版本,我们将在未来几天内发布。欢迎任何反馈以帮助我们改进 v0.11.0。" + +#: ../../source/user_guide/release_notes.md:659 +msgid "" +"CANN is upgrade to 8.3.RC1. Torch-npu is upgrade to 2.7.1. " +"[#3945](https://github.com/vllm-project/vllm-ascend/pull/3945) " +"[#3896](https://github.com/vllm-project/vllm-ascend/pull/3896)" +msgstr "" +"CANN 已升级至 8.3.RC1。Torch-npu 已升级至 2.7.1。 " +"[#3945](https://github.com/vllm-project/vllm-ascend/pull/3945) " +"[#3896](https://github.com/vllm-project/vllm-ascend/pull/3896)" + +#: ../../source/user_guide/release_notes.md:660 +msgid "" +"PrefixCache and Chunked Prefill are enabled by default. " +"[#3967](https://github.com/vllm-project/vllm-ascend/pull/3967)" +msgstr "" +"前缀缓存 (PrefixCache) 和分块预填充 (Chunked Prefill) 现已默认启用。 " +"[#3967](https://github.com/vllm-project/vllm-ascend/pull/3967)" + +#: ../../source/user_guide/release_notes.md:661 +msgid "" +"W4A4 quantization is supported now. [#3427](https://github.com/vllm-" +"project/vllm-ascend/pull/3427) Official tutorial is available at " +"[single_npu_qwen3_w4a4]." +msgstr "" +"现已支持 W4A4 量化。 [#3427](https://github.com/vllm-project/vllm-ascend/pull/3427) 官方教程请参阅 [single_npu_qwen3_w4a4]。" + +#: ../../source/user_guide/release_notes.md:665 +msgid "Performance of Qwen3 and Deepseek V3 series models are improved." +msgstr "Qwen3 和 Deepseek V3 系列模型的性能已得到提升。" + +#: ../../source/user_guide/release_notes.md:666 +msgid "" +"Mooncake layerwise connector is supported now [#2602](https://github.com" +"/vllm-project/vllm-ascend/pull/2602). Find tutorial " +"[pd_disaggregation_mooncake_multi_node](https://github.com/vllm-project" +"/vllm-" +"ascend/blob/v0.11.0rc1/docs/source/tutorials/multi_node_pd_disaggregation_mooncake.md)." +msgstr "" +"现已支持 Mooncake 分层连接器 [#2602](https://github.com/vllm-project/vllm-ascend/pull/2602)。教程请参阅 [pd_disaggregation_mooncake_multi_node](https://github.com/vllm-project/vllm-ascend/blob/v0.11.0rc1/docs/source/tutorials/multi_node_pd_disaggregation_mooncake.md)。" + +#: ../../source/user_guide/release_notes.md:667 +msgid "" +"MTP > 1 is supported now. [#2708](https://github.com/vllm-project/vllm-" +"ascend/pull/2708)" +msgstr "" +"现已支持 MTP > 1。 [#2708](https://github.com/vllm-project/vllm-ascend/pull/2708)" + +#: ../../source/user_guide/release_notes.md:668 +msgid "" +"[Experimental] Graph mode `FULL_DECODE_ONLY` is supported now! And `FULL`" +" will be landing in the next few weeks. [#2128](https://github.com/vllm-" +"project/vllm-ascend/pull/2128)" +msgstr "" +"[实验性] 现已支持图模式 `FULL_DECODE_ONLY`!`FULL` 模式将在未来几周内推出。 [#2128](https://github.com/vllm-project/vllm-ascend/pull/2128)" + +#: ../../source/user_guide/release_notes.md:669 +msgid "" +"Pooling models, such as bge-m3, are supported now. " +"[#3171](https://github.com/vllm-project/vllm-ascend/pull/3171)" +msgstr "" +"现已支持池化模型,例如 bge-m3。 " +"[#3171](https://github.com/vllm-project/vllm-ascend/pull/3171)" + +#: ../../source/user_guide/release_notes.md:673 +msgid "" +"Refactor the MOE module to make it clearer and easier to understand and " +"the performance has improved in both quantitative and non-quantitative " +"scenarios." +msgstr "" +"重构了 MOE 模块,使其更清晰易懂,并且在量化和非量化场景下的性能均有所提升。" + +#: ../../source/user_guide/release_notes.md:674 +msgid "" +"Refactor model register module to make it easier to maintain. We'll " +"remove this module in Q4 2025. [#3004](https://github.com/vllm-project" +"/vllm-ascend/pull/3004)" +msgstr "" +"重构了模型注册模块以方便维护。我们将在 2025 年第四季度移除该模块。 [#3004](https://github.com/vllm-project/vllm-ascend/pull/3004)" + +#: ../../source/user_guide/release_notes.md:675 +msgid "" +"Torchair is deprecated. We'll remove it once the performance of ACL Graph" +" is good enough. The deadline is Q1 2026." +msgstr "" +"Torchair 已弃用。一旦 ACL Graph 的性能足够好,我们将移除它。截止日期为 2026 年第一季度。" + +#: ../../source/user_guide/release_notes.md:676 +msgid "LLMDatadist KV Connector is deprecated. We'll remove it in Q1 2026." +msgstr "LLMDatadist KV 连接器已弃用。我们将在 2026 年第一季度移除它。" + +#: ../../source/user_guide/release_notes.md:677 +msgid "" +"Refactor the linear module to support features flashcomm1 and flashcomm2 " +"in paper [flashcomm](https://arxiv.org/pdf/2412.04964) " +"[#3004](https://github.com/vllm-project/vllm-ascend/pull/3004) " +"[#3334](https://github.com/vllm-project/vllm-ascend/pull/3334)" +msgstr "" +"重构了线性模块以支持论文 [flashcomm](https://arxiv.org/pdf/2412.04964) 中的 flashcomm1 和 flashcomm2 特性。 " +"[#3004](https://github.com/vllm-project/vllm-ascend/pull/3004) " +"[#3334](https://github.com/vllm-project/vllm-ascend/pull/3334)" + +#: ../../source/user_guide/release_notes.md:679 +msgid "Known issue" +msgstr "已知问题" + +#: ../../source/user_guide/release_notes.md:681 +msgid "" +"The memory may be leaked and the service may be stuck after long time " +"serving. This is a bug from torch-npu, we'll upgrade and fix it soon." +msgstr "" +"长时间服务后可能出现内存泄漏和服务卡住的问题。这是 torch-npu 的一个错误,我们将尽快升级并修复。" + +#: ../../source/user_guide/release_notes.md:682 +msgid "" +"The accuracy of qwen2.5 VL is not very good. This is a bug lead by CANN, " +"we fix it soon." +msgstr "" +"qwen2.5 VL 模型的准确性不佳。这是由 CANN 导致的一个错误,我们将尽快修复。" + +#: ../../source/user_guide/release_notes.md:683 +msgid "" +"For long sequence input case, there is no response sometimes and the kv " +"cache usage is become higher. This is a bug for scheduler. We are working" +" on it." +msgstr "" +"对于长序列输入场景,有时会没有响应,并且 KV 缓存使用率会变高。这是调度器的一个错误,我们正在处理中。" + +#: ../../source/user_guide/release_notes.md:684 +msgid "" +"Qwen2-audio doesn't work by default, we're fixing it. Temporary solution " +"is to set `--gpu-memory-utilization` to a suitable value, such as 0.8." +msgstr "" +"Qwen2-audio 默认无法工作,我们正在修复。临时解决方案是将 `--gpu-memory-utilization` 设置为合适的值,例如 0.8。" + +#: ../../source/user_guide/release_notes.md:685 +msgid "" +"When running Qwen3-Next with expert parallel enabled, please set " +"`HCCL_BUFFSIZE` environment variable to a suitable value, such as 1024." +msgstr "" +"当启用专家并行运行 Qwen3-Next 时,请将 `HCCL_BUFFSIZE` 环境变量设置为合适的值,例如 1024。" + +#: ../../source/user_guide/release_notes.md:686 +msgid "" +"The accuracy of DeepSeek3.2 with aclgraph is not correct. Temporary " +"solution is to set `cudagraph_capture_sizes` to a suitable value " +"depending on the batch size for the input." +msgstr "" +"使用 aclgraph 时 DeepSeek3.2 的准确性不正确。临时解决方案是根据输入的批次大小将 `cudagraph_capture_sizes` 设置为合适的值。" + +#: ../../source/user_guide/release_notes.md:688 +msgid "v0.11.0rc0 - 2025.09.30" +msgstr "v0.11.0rc0 - 2025.09.30" + +#: ../../source/user_guide/release_notes.md:690 +msgid "" +"This is the special release candidate of v0.11.0 for vLLM Ascend. Please " +"follow the [official doc](https://github.com/vllm-project/vllm-" +"ascend/tree/v0.11.0rc0) to get started." +msgstr "" +"这是 vLLM Ascend v0.11.0 的特殊候选发布版本。请按照[官方文档](https://github.com/vllm-project/vllm-ascend/tree/v0.11.0rc0)开始使用。" + +#: ../../source/user_guide/release_notes.md:694 +msgid "" +"DeepSeek V3.2 is supported now. [#3270](https://github.com/vllm-project" +"/vllm-ascend/pull/3270)" +msgstr "" +"现已支持 DeepSeek V3.2。 [#3270](https://github.com/vllm-project/vllm-ascend/pull/3270)" + +#: ../../source/user_guide/release_notes.md:695 +msgid "" +"Qwen3-vl is supported now. [#3103](https://github.com/vllm-project/vllm-" +"ascend/pull/3103)" +msgstr "" +"现已支持 Qwen3-vl。 [#3103](https://github.com/vllm-project/vllm-ascend/pull/3103)" + +#: ../../source/user_guide/release_notes.md:699 +msgid "" +"DeepSeek works with aclgraph now. [#2707](https://github.com/vllm-project" +"/vllm-ascend/pull/2707)" +msgstr "" +"DeepSeek 现已支持与 aclgraph 协同工作。 [#2707](https://github.com/vllm-project/vllm-ascend/pull/2707)" + +#: ../../source/user_guide/release_notes.md:700 +msgid "" +"MTP works with aclgraph now. [#2932](https://github.com/vllm-project" +"/vllm-ascend/pull/2932)" +msgstr "" +"MTP 现已支持与 aclgraph 协同工作。 [#2932](https://github.com/vllm-project/vllm-ascend/pull/2932)" + +#: ../../source/user_guide/release_notes.md:701 +msgid "" +"EPLB is supported now. [#2956](https://github.com/vllm-project/vllm-" +"ascend/pull/2956)" +msgstr "" +"现已支持 EPLB。 [#2956](https://github.com/vllm-project/vllm-ascend/pull/2956)" + +#: ../../source/user_guide/release_notes.md:702 +msgid "" +"Mooncacke store kvcache connector is supported now. " +"[#2913](https://github.com/vllm-project/vllm-ascend/pull/2913)" +msgstr "" +"现已支持 Mooncake 存储 KV 缓存连接器。 " +"[#2913](https://github.com/vllm-project/vllm-ascend/pull/2913)" + +#: ../../source/user_guide/release_notes.md:703 +msgid "" +"CPU offload connector is supported now. [#1659](https://github.com/vllm-" +"project/vllm-ascend/pull/1659)" +msgstr "" +"现已支持 CPU 卸载连接器。 [#1659](https://github.com/vllm-project/vllm-ascend/pull/1659)" + +#: ../../source/user_guide/release_notes.md:708 +msgid "" +"Fixed a lot of bugs introduced in v0.10.2 by Qwen3-next. " +"[#2964](https://github.com/vllm-project/vllm-ascend/pull/2964) " +"[#2781](https://github.com/vllm-project/vllm-ascend/pull/2781) " +"[#3070](https://github.com/vllm-project/vllm-ascend/pull/3070) " +"[#3113](https://github.com/vllm-project/vllm-ascend/pull/3113)" +msgstr "" +"修复了 Qwen3-next 在 v0.10.2 版本中引入的大量错误。" +"[#2964](https://github.com/vllm-project/vllm-ascend/pull/2964) " +"[#2781](https://github.com/vllm-project/vllm-ascend/pull/2781) " +"[#3070](https://github.com/vllm-project/vllm-ascend/pull/3070) " +"[#3113](https://github.com/vllm-project/vllm-ascend/pull/3113)" + +#: ../../source/user_guide/release_notes.md:709 +msgid "" +"The LoRA feature is back now. [#3044](https://github.com/vllm-project" +"/vllm-ascend/pull/3044)" +msgstr "" +"LoRA 功能现已恢复。 [#3044](https://github.com/vllm-project/vllm-ascend/pull/3044)" + +#: ../../source/user_guide/release_notes.md:710 +msgid "" +"Eagle3 spec decode method is back now. [#2949](https://github.com/vllm-" +"project/vllm-ascend/pull/2949)" +msgstr "" +"Eagle3 推测解码方法现已恢复。 [#2949](https://github.com/vllm-project/vllm-ascend/pull/2949)" + +#: ../../source/user_guide/release_notes.md:712 +msgid "v0.10.2rc1 - 2025.09.16" +msgstr "v0.10.2rc1 - 2025.09.16" + +#: ../../source/user_guide/release_notes.md:714 +msgid "" +"This is the 1st release candidate of v0.10.2 for vLLM Ascend. Please " +"follow the [official doc](https://github.com/vllm-project/vllm-" +"ascend/tree/v0.10.2rc1) to get started." +msgstr "" +"这是 vLLM Ascend v0.10.2 的第一个候选发布版本。请按照[官方文档](https://github.com/vllm-project/vllm-ascend/tree/v0.10.2rc1)开始使用。" + +#: ../../source/user_guide/release_notes.md:718 +msgid "" +"Added support for Qwen3-Next. Please note that the expert parallel and " +"MTP features do not work with this release. We will be adding support for" +" them soon. Follow the [official guide](https://github.com/vllm-project" +"/vllm-" +"ascend/blob/v0.10.2rc1/docs/source/tutorials/multi_npu_qwen3_next.md) to " +"get started. [#2917](https://github.com/vllm-project/vllm-" +"ascend/pull/2917)" +msgstr "" +"新增对 Qwen3-Next 的支持。请注意,专家并行和 MTP 功能在此版本中不可用。我们将很快添加对它们的支持。请按照[官方指南](https://github.com/vllm-project/vllm-ascend/blob/v0.10.2rc1/docs/source/tutorials/multi_npu_qwen3_next.md)开始使用。 [#2917](https://github.com/vllm-project/vllm-ascend/pull/2917)" + +#: ../../source/user_guide/release_notes.md:719 +msgid "" +"Added quantization support for aclgraph [#2841](https://github.com/vllm-" +"project/vllm-ascend/pull/2841)" +msgstr "" +"为 aclgraph 添加量化支持 [#2841](https://github.com/vllm-project/vllm-ascend/pull/2841)" + +#: ../../source/user_guide/release_notes.md:723 +msgid "" +"Aclgraph now works with Ray backend. [#2589](https://github.com/vllm-" +"project/vllm-ascend/pull/2589)" +msgstr "" +"Aclgraph 现在可与 Ray 后端协同工作。 [#2589](https://github.com/vllm-project/vllm-ascend/pull/2589)" + +#: ../../source/user_guide/release_notes.md:724 +msgid "" +"MTP now works with the token > 1. [#2708](https://github.com/vllm-project" +"/vllm-ascend/pull/2708)" +msgstr "" +"MTP 现在支持 token > 1 的情况。 [#2708](https://github.com/vllm-project/vllm-ascend/pull/2708)" + +#: ../../source/user_guide/release_notes.md:725 +msgid "" +"Qwen2.5 VL now works with quantization. [#2778](https://github.com/vllm-" +"project/vllm-ascend/pull/2778)" +msgstr "" +"Qwen2.5 VL 现在支持量化。 [#2778](https://github.com/vllm-project/vllm-ascend/pull/2778)" + +#: ../../source/user_guide/release_notes.md:726 +msgid "" +"Improved the performance with async scheduler enabled. " +"[#2783](https://github.com/vllm-project/vllm-ascend/pull/2783)" +msgstr "" +"启用了异步调度器后,性能得到提升。 [#2783](https://github.com/vllm-project/vllm-ascend/pull/2783)" + +#: ../../source/user_guide/release_notes.md:727 +msgid "" +"Fixed the performance regression with non MLA model when using default " +"scheduler. [#2894](https://github.com/vllm-project/vllm-ascend/pull/2894)" +msgstr "" +"修复了使用默认调度器时非 MLA 模型的性能回归问题。 [#2894](https://github.com/vllm-project/vllm-ascend/pull/2894)" + +#: ../../source/user_guide/release_notes.md:731 +msgid "" +"The performance of W8A8 quantization is improved. " +"[#2275](https://github.com/vllm-project/vllm-ascend/pull/2275)" +msgstr "" +"W8A8 量化的性能得到提升。 [#2275](https://github.com/vllm-project/vllm-ascend/pull/2275)" + +#: ../../source/user_guide/release_notes.md:732 +msgid "" +"The performance is improved for moe models. [#2689](https://github.com" +"/vllm-project/vllm-ascend/pull/2689) [#2842](https://github.com/vllm-" +"project/vllm-ascend/pull/2842)" +msgstr "" +"MoE 模型的性能得到提升。 [#2689](https://github.com/vllm-project/vllm-ascend/pull/2689) [#2842](https://github.com/vllm-project/vllm-ascend/pull/2842)" + +#: ../../source/user_guide/release_notes.md:733 +msgid "" +"Fixed resources limit error when apply speculative decoding and aclgraph." +" [#2472](https://github.com/vllm-project/vllm-ascend/pull/2472)" +msgstr "" +"修复了应用推测解码和 aclgraph 时的资源限制错误。 [#2472](https://github.com/vllm-project/vllm-ascend/pull/2472)" + +#: ../../source/user_guide/release_notes.md:734 +msgid "" +"Fixed the git config error in Docker images. [#2746](https://github.com" +"/vllm-project/vllm-ascend/pull/2746)" +msgstr "" +"修复了 Docker 镜像中的 git 配置错误。 [#2746](https://github.com/vllm-project/vllm-ascend/pull/2746)" + +#: ../../source/user_guide/release_notes.md:735 +msgid "" +"Fixed the sliding windows attention bug with prefill. " +"[#2758](https://github.com/vllm-project/vllm-ascend/pull/2758)" +msgstr "" +"修复了预填充阶段的滑动窗口注意力错误。 [#2758](https://github.com/vllm-project/vllm-ascend/pull/2758)" + +#: ../../source/user_guide/release_notes.md:736 +msgid "" +"The official doc for Prefill-Decode Disaggregation with Qwen3 is added. " +"[#2751](https://github.com/vllm-project/vllm-ascend/pull/2751)" +msgstr "" +"新增了关于 Qwen3 预填充-解码解耦的官方文档。 [#2751](https://github.com/vllm-project/vllm-ascend/pull/2751)" + +#: ../../source/user_guide/release_notes.md:737 +msgid "" +"`VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP` env works again. " +"[#2740](https://github.com/vllm-project/vllm-ascend/pull/2740)" +msgstr "" +"环境变量 `VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP` 再次生效。 [#2740](https://github.com/vllm-project/vllm-ascend/pull/2740)" + +#: ../../source/user_guide/release_notes.md:738 +msgid "" +"A new improvement for oproj in deepseek is added. Set " +"`oproj_tensor_parallel_size` to enable this feature. " +"[#2167](https://github.com/vllm-project/vllm-ascend/pull/2167)" +msgstr "" +"为 DeepSeek 中的 oproj 添加了一项新改进。设置 `oproj_tensor_parallel_size` 以启用此功能。 [#2167](https://github.com/vllm-project/vllm-ascend/pull/2167)" + +#: ../../source/user_guide/release_notes.md:739 +msgid "" +"Fix a bug that deepseek with torchair doesn't work as expect when " +"`graph_batch_sizes` is set. [#2760](https://github.com/vllm-project/vllm-" +"ascend/pull/2760)" +msgstr "" +"修复了一个 bug:当设置了 `graph_batch_sizes` 时,使用 torchair 的 deepseek 模型未按预期工作。 [#2760](https://github.com/vllm-project/vllm-ascend/pull/2760)" + +#: ../../source/user_guide/release_notes.md:740 +msgid "" +"Avoid duplicate generation of sin_cos_cache in rope when kv_seqlen > 4k. " +"[#2744](https://github.com/vllm-project/vllm-ascend/pull/2744)" +msgstr "" +"当 kv_seqlen > 4k 时,避免在 rope 中重复生成 sin_cos_cache。 [#2744](https://github.com/vllm-project/vllm-ascend/pull/2744)" + +#: ../../source/user_guide/release_notes.md:741 +msgid "" +"The performance of Qwen3 dense model is improved with flashcomm_v1. Set " +"`VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE=1` and " +"`VLLM_ASCEND_ENABLE_FLASHCOMM=1` to enable it. [#2779](https://github.com" +"/vllm-project/vllm-ascend/pull/2779)" +msgstr "" +"Qwen3 稠密模型的性能通过 flashcomm_v1 得到提升。设置 `VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE=1` 和 `VLLM_ASCEND_ENABLE_FLASHCOMM=1` 以启用此功能。 [#2779](https://github.com/vllm-project/vllm-ascend/pull/2779)" + +#: ../../source/user_guide/release_notes.md:742 +msgid "" +"The performance of Qwen3 dense model is improved with prefetch feature. " +"Set `VLLM_ASCEND_ENABLE_PREFETCH_MLP=1` to enable it. " +"[#2816](https://github.com/vllm-project/vllm-ascend/pull/2816)" +msgstr "" +"Qwen3 稠密模型的性能通过预取功能得到提升。设置 `VLLM_ASCEND_ENABLE_PREFETCH_MLP=1` 以启用此功能。 [#2816](https://github.com/vllm-project/vllm-ascend/pull/2816)" + +#: ../../source/user_guide/release_notes.md:743 +msgid "" +"The performance of Qwen3 MoE model is improved with rope ops update. " +"[#2571](https://github.com/vllm-project/vllm-ascend/pull/2571)" +msgstr "" +"Qwen3 MoE 模型的性能通过 rope 算子更新得到提升。 [#2571](https://github.com/vllm-project/vllm-ascend/pull/2571)" + +#: ../../source/user_guide/release_notes.md:744 +msgid "" +"Fix the weight load error for RLHF case. [#2756](https://github.com/vllm-" +"project/vllm-ascend/pull/2756)" +msgstr "" +"修复了 RLHF 场景下的权重加载错误。 [#2756](https://github.com/vllm-project/vllm-ascend/pull/2756)" + +#: ../../source/user_guide/release_notes.md:745 +msgid "" +"Add warm_up_atb step to speed up the inference. " +"[#2823](https://github.com/vllm-project/vllm-ascend/pull/2823)" +msgstr "" +"添加 warm_up_atb 步骤以加速推理。 [#2823](https://github.com/vllm-project/vllm-ascend/pull/2823)" + +#: ../../source/user_guide/release_notes.md:746 +msgid "" +"Fixed the aclgraph steam error for moe model. [#2827](https://github.com" +"/vllm-project/vllm-ascend/pull/2827)" +msgstr "" +"修复了 MoE 模型的 aclgraph 流错误。 [#2827](https://github.com/vllm-project/vllm-ascend/pull/2827)" + +#: ../../source/user_guide/release_notes.md:750 +msgid "" +"The server will hang when running Prefill Decode Disaggregation with " +"different TP size for P and D. It's fixed by [vLLM " +"commit](https://github.com/vllm-project/vllm/pull/23917) which is not " +"included in v0.10.2. You can pick this commit to fix the issue." +msgstr "" +"当预填充和解码阶段使用不同的 TP 大小时,运行预填充-解码解耦会导致服务器挂起。此问题已由 [vLLM commit](https://github.com/vllm-project/vllm/pull/23917) 修复,但该提交未包含在 v0.10.2 中。您可以选取此提交来修复该问题。" + +#: ../../source/user_guide/release_notes.md:751 +msgid "" +"The HBM usage of Qwen3-Next is higher than expected. It is a [known " +"issue](https://github.com/vllm-project/vllm-ascend/issues/2884) and we " +"are working on it. You can set `max_model_len` and " +"`gpu_memory_utilization` to suitable value based on your parallel " +"configuration to avoid oom error." +msgstr "" +"Qwen3-Next 的 HBM 使用率高于预期。这是一个[已知问题](https://github.com/vllm-project/vllm-ascend/issues/2884),我们正在处理中。您可以根据您的并行配置,将 `max_model_len` 和 `gpu_memory_utilization` 设置为合适的值以避免内存溢出错误。" + +#: ../../source/user_guide/release_notes.md:752 +msgid "" +"We notice that LoRA does not work with this release due to the refactor " +"of KV cache. We will fix it soon. [2941](https://github.com/vllm-project" +"/vllm-ascend/issues/2941)" +msgstr "" +"我们注意到,由于 KV 缓存的重新设计,LoRA 在此版本中无法工作。我们将尽快修复此问题。 [2941](https://github.com/vllm-project/vllm-ascend/issues/2941)" + +#: ../../source/user_guide/release_notes.md:753 +msgid "" +"Please do not enable chunked prefill with prefix cache when running with " +"Ascend scheduler. The performance and accuracy is not good/correct. " +"[#2943](https://github.com/vllm-project/vllm-ascend/issues/2943)" +msgstr "" +"在使用 Ascend 调度器运行时,请不要启用带有前缀缓存的分块预填充。其性能和准确性不佳/不正确。 [#2943](https://github.com/vllm-project/vllm-ascend/issues/2943)" + +#: ../../source/user_guide/release_notes.md:755 +msgid "v0.10.1rc1 - 2025.09.04" +msgstr "v0.10.1rc1 - 2025.09.04" + +#: ../../source/user_guide/release_notes.md:757 +msgid "" +"This is the 1st release candidate of v0.10.1 for vLLM Ascend. Please " +"follow the [official doc](https://github.com/vllm-project/vllm-" +"ascend/tree/v0.10.1rc1) to get started." +msgstr "" +"这是 vLLM Ascend v0.10.1 的第一个候选发布版本。请按照[官方文档](https://github.com/vllm-project/vllm-ascend/tree/v0.10.1rc1)开始使用。" + +#: ../../source/user_guide/release_notes.md:761 +msgid "" +"LoRA Performance improved much through adding Custom Kernels by China " +"Merchants Bank. [#2325](https://github.com/vllm-project/vllm-" +"ascend/pull/2325)" +msgstr "" +"通过招商银行添加的自定义内核,LoRA 性能得到大幅提升。 [#2325](https://github.com/vllm-project/vllm-ascend/pull/2325)" + +#: ../../source/user_guide/release_notes.md:762 +msgid "" +"Support Mooncake TransferEngine for kv cache register and pull_blocks " +"style disaggregate prefill implementation. [#1568](https://github.com" +"/vllm-project/vllm-ascend/pull/1568)" +msgstr "" +"支持使用 Mooncake TransferEngine 进行 kv 缓存注册和 pull_blocks 风格的解耦预填充实现。 [#1568](https://github.com/vllm-project/vllm-ascend/pull/1568)" + +#: ../../source/user_guide/release_notes.md:763 +msgid "" +"Support capture custom ops into aclgraph now. [#2113](https://github.com" +"/vllm-project/vllm-ascend/pull/2113)" +msgstr "" +"现在支持将自定义算子捕获到 aclgraph 中。 [#2113](https://github.com/vllm-project/vllm-ascend/pull/2113)" + +#: ../../source/user_guide/release_notes.md:767 +msgid "" +"Added MLP tensor parallel to improve performance, but note that this will" +" increase memory usage. [#2120](https://github.com/vllm-project/vllm-" +"ascend/pull/2120)" +msgstr "" +"新增了 MLP 张量并行以提升性能,但请注意这会增加内存使用量。[#2120](https://github.com/vllm-project/vllm-" +"ascend/pull/2120)" + +#: ../../source/user_guide/release_notes.md:768 +msgid "" +"openEuler is upgraded to 24.03. [#2631](https://github.com/vllm-project" +"/vllm-ascend/pull/2631)" +msgstr "" +"openEuler 已升级至 24.03 版本。[#2631](https://github.com/vllm-project/vllm-" +"ascend/pull/2631)" + +#: ../../source/user_guide/release_notes.md:769 +msgid "" +"Added custom lmhead tensor parallel to achieve reduced memory consumption" +" and improved TPOT performance. [#2309](https://github.com/vllm-project" +"/vllm-ascend/pull/2309)" +msgstr "" +"新增了自定义 lmhead 张量并行,以实现更低的内存消耗和更高的 TPOT 性能。[#2309](https://github.com/vllm-project/vllm-" +"ascend/pull/2309)" + +#: ../../source/user_guide/release_notes.md:770 +msgid "" +"Qwen3 MoE/Qwen2.5 support torchair graph now. [#2403](https://github.com" +"/vllm-project/vllm-ascend/pull/2403)" +msgstr "" +"Qwen3 MoE/Qwen2.5 现已支持 torchair 图模式。[#2403](https://github.com/vllm-project/vllm-" +"ascend/pull/2403)" + +#: ../../source/user_guide/release_notes.md:771 +msgid "" +"Support Sliding Window Attention with AscendSceduler, thus fixing Gemma3 " +"accuracy issue. [#2528](https://github.com/vllm-project/vllm-" +"ascend/pull/2528)" +msgstr "" +"支持 AscendScheduler 的滑动窗口注意力机制,从而修复了 Gemma3 的精度问题。[#2528](https://github.com/vllm-project/vllm-" +"ascend/pull/2528)" + +#: ../../source/user_guide/release_notes.md:775 +#: ../../source/user_guide/release_notes.md:906 +msgid "Bug fixes:" +msgstr "漏洞修复:" + +#: ../../source/user_guide/release_notes.md:776 +msgid "" +"Updated the graph capture size calculation, somehow alleviated the " +"problem that NPU stream not enough in some scenarios. " +"[#2511](https://github.com/vllm-project/vllm-ascend/pull/2511)" +msgstr "" +"更新了图捕获大小的计算方式,在一定程度上缓解了某些场景下 NPU 流不足的问题。[#2511](https://github.com/vllm-project/vllm-" +"ascend/pull/2511)" + +#: ../../source/user_guide/release_notes.md:777 +msgid "" +"Fixed bugs and refactor cached mask generation logic. " +"[#2442](https://github.com/vllm-project/vllm-ascend/pull/2442)" +msgstr "" +"修复了漏洞并重构了缓存掩码生成逻辑。[#2442](https://github.com/vllm-project/vllm-" +"ascend/pull/2442)" + +#: ../../source/user_guide/release_notes.md:778 +msgid "" +"Fixed the nz format does not work in quantization scenarios. " +"[#2549](https://github.com/vllm-project/vllm-ascend/pull/2549)" +msgstr "" +"修复了 nz 格式在量化场景下无效的问题。[#2549](https://github.com/vllm-project/vllm-" +"ascend/pull/2549)" + +#: ../../source/user_guide/release_notes.md:779 +msgid "" +"Fixed the accuracy issue on Qwen series caused by enabling " +"`enable_shared_pert_dp` by default. [#2457](https://github.com/vllm-" +"project/vllm-ascend/pull/2457)" +msgstr "" +"修复了因默认启用 `enable_shared_pert_dp` 导致的 Qwen 系列模型的精度问题。[#2457](https://github.com/vllm-project/vllm-" +"ascend/pull/2457)" + +#: ../../source/user_guide/release_notes.md:780 +msgid "" +"Fixed the accuracy issue on models whose rope dim is not equal to head " +"dim, e.g., GLM4.5. [#2601](https://github.com/vllm-project/vllm-" +"ascend/pull/2601)" +msgstr "" +"修复了在 rope 维度不等于头维度的模型(例如 GLM4.5)上的精度问题。[#2601](https://github.com/vllm-project/vllm-" +"ascend/pull/2601)" + +#: ../../source/user_guide/release_notes.md:781 +#: ../../source/user_guide/release_notes.md:911 +msgid "Performance improved through a lot of prs:" +msgstr "通过大量 PR 提升了性能:" + +#: ../../source/user_guide/release_notes.md:782 +msgid "" +"Removed torch.cat and replaced it with List[0]. " +"[#2153](https://github.com/vllm-project/vllm-ascend/pull/2153)" +msgstr "" +"移除了 torch.cat 并用 List[0] 替代。[#2153](https://github.com/vllm-project/vllm-" +"ascend/pull/2153)" + +#: ../../source/user_guide/release_notes.md:783 +msgid "" +"Converted the format of gmm to nz. [#2474](https://github.com/vllm-" +"project/vllm-ascend/pull/2474)" +msgstr "" +"将 gmm 的格式转换为 nz。[#2474](https://github.com/vllm-project/vllm-" +"ascend/pull/2474)" + +#: ../../source/user_guide/release_notes.md:784 +msgid "" +"Optimized parallel strategies to reduce communication overhead. " +"[#2198](https://github.com/vllm-project/vllm-ascend/pull/2198)" +msgstr "" +"优化了并行策略以减少通信开销。[#2198](https://github.com/vllm-project/vllm-" +"ascend/pull/2198)" + +#: ../../source/user_guide/release_notes.md:785 +msgid "" +"Optimized reject sampler in greedy situation. [#2137](https://github.com" +"/vllm-project/vllm-ascend/pull/2137)" +msgstr "" +"优化了贪婪模式下的拒绝采样器。[#2137](https://github.com/vllm-project/vllm-" +"ascend/pull/2137)" + +#: ../../source/user_guide/release_notes.md:786 +msgid "A batch of refactoring PRs to enhance the code architecture:" +msgstr "一系列重构 PR 以增强代码架构:" + +#: ../../source/user_guide/release_notes.md:787 +msgid "" +"Refactor on MLA. [#2465](https://github.com/vllm-project/vllm-" +"ascend/pull/2465)" +msgstr "" +"重构了 MLA。[#2465](https://github.com/vllm-project/vllm-" +"ascend/pull/2465)" + +#: ../../source/user_guide/release_notes.md:788 +msgid "" +"Refactor on torchair fused_moe. [#2438](https://github.com/vllm-project" +"/vllm-ascend/pull/2438)" +msgstr "" +"重构了 torchair fused_moe。[#2438](https://github.com/vllm-project/vllm-" +"ascend/pull/2438)" + +#: ../../source/user_guide/release_notes.md:789 +msgid "" +"Refactor on allgather/mc2-related fused_experts. " +"[#2369](https://github.com/vllm-project/vllm-ascend/pull/2369)" +msgstr "" +"重构了 allgather/mc2 相关的 fused_experts。[#2369](https://github.com/vllm-project/vllm-" +"ascend/pull/2369)" + +#: ../../source/user_guide/release_notes.md:790 +msgid "" +"Refactor on torchair model runner. [#2208](https://github.com/vllm-" +"project/vllm-ascend/pull/2208)" +msgstr "" +"重构了 torchair model runner。[#2208](https://github.com/vllm-project/vllm-" +"ascend/pull/2208)" + +#: ../../source/user_guide/release_notes.md:791 +msgid "" +"Refactor on CI. [#2276](https://github.com/vllm-project/vllm-" +"ascend/pull/2276)" +msgstr "" +"重构了 CI。[#2276](https://github.com/vllm-project/vllm-" +"ascend/pull/2276)" + +#: ../../source/user_guide/release_notes.md:792 +#: ../../source/user_guide/release_notes.md:926 +msgid "Parameters changes:" +msgstr "参数变更:" + +#: ../../source/user_guide/release_notes.md:793 +msgid "" +"Added `lmhead_tensor_parallel_size` in `additional_config`, set it to " +"enable lmhead tensor parallel. [#2309](https://github.com/vllm-project" +"/vllm-ascend/pull/2309)" +msgstr "" +"在 `additional_config` 中新增了 `lmhead_tensor_parallel_size` 参数,设置该参数以启用 lmhead 张量并行。[#2309](https://github.com/vllm-project/vllm-" +"ascend/pull/2309)" + +#: ../../source/user_guide/release_notes.md:794 +msgid "" +"Some unused environment variables `HCCN_PATH`, `PROMPT_DEVICE_ID`, " +"`DECODE_DEVICE_ID`, `LLMDATADIST_COMM_PORT` and " +"`LLMDATADIST_SYNC_CACHE_WAIT_TIME` are removed. " +"[#2448](https://github.com/vllm-project/vllm-ascend/pull/2448)" +msgstr "" +"移除了未使用的环境变量 `HCCN_PATH`、`PROMPT_DEVICE_ID`、`DECODE_DEVICE_ID`、`LLMDATADIST_COMM_PORT` 和 `LLMDATADIST_SYNC_CACHE_WAIT_TIME`。[#2448](https://github.com/vllm-project/vllm-" +"ascend/pull/2448)" + +#: ../../source/user_guide/release_notes.md:795 +msgid "" +"Environment variable `VLLM_LLMDD_RPC_PORT` is renamed to " +"`VLLM_ASCEND_LLMDD_RPC_PORT` now. [#2450](https://github.com/vllm-project" +"/vllm-ascend/pull/2450)" +msgstr "" +"环境变量 `VLLM_LLMDD_RPC_PORT` 现已重命名为 `VLLM_ASCEND_LLMDD_RPC_PORT`。[#2450](https://github.com/vllm-project/vllm-" +"ascend/pull/2450)" + +#: ../../source/user_guide/release_notes.md:796 +msgid "" +"Added `VLLM_ASCEND_ENABLE_MLP_OPTIMIZE` in environment variables, Whether" +" to enable mlp optimize when tensor parallel is enabled. This feature " +"provides better performance in eager mode. [#2120](https://github.com" +"/vllm-project/vllm-ascend/pull/2120)" +msgstr "" +"在环境变量中新增了 `VLLM_ASCEND_ENABLE_MLP_OPTIMIZE`,用于控制在启用张量并行时是否启用 MLP 优化。此功能在 eager 模式下能提供更好的性能。[#2120](https://github.com/vllm-project/vllm-" +"ascend/pull/2120)" + +#: ../../source/user_guide/release_notes.md:797 +msgid "" +"Removed `MOE_ALL2ALL_BUFFER` and `VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ` in " +"environment variables. [#2612](https://github.com/vllm-project/vllm-" +"ascend/pull/2612)" +msgstr "" +"移除了环境变量中的 `MOE_ALL2ALL_BUFFER` 和 `VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ`。[#2612](https://github.com/vllm-project/vllm-" +"ascend/pull/2612)" + +#: ../../source/user_guide/release_notes.md:798 +msgid "" +"Added `enable_prefetch` in `additional_config`, Whether to enable weight " +"prefetch. [#2465](https://github.com/vllm-project/vllm-ascend/pull/2465)" +msgstr "" +"在 `additional_config` 中新增了 `enable_prefetch` 参数,用于控制是否启用权重预取。[#2465](https://github.com/vllm-project/vllm-" +"ascend/pull/2465)" + +#: ../../source/user_guide/release_notes.md:799 +msgid "" +"Added `mode` in `additional_config.torchair_graph_config`, When using " +"reduce-overhead mode for torchair, mode needs to be set. " +"[#2461](https://github.com/vllm-project/vllm-ascend/pull/2461)" +msgstr "" +"在 `additional_config.torchair_graph_config` 中新增了 `mode` 参数,当使用 torchair 的 reduce-overhead 模式时,需要设置此参数。[#2461](https://github.com/vllm-project/vllm-" +"ascend/pull/2461)" + +#: ../../source/user_guide/release_notes.md:800 +msgid "" +"`enable_shared_expert_dp` in `additional_config` is disabled by default " +"now, and it is recommended to be enabled when inferencing with deepseek. " +"[#2457](https://github.com/vllm-project/vllm-ascend/pull/2457)" +msgstr "" +"`additional_config` 中的 `enable_shared_expert_dp` 参数现在默认禁用,建议在使用 deepseek 进行推理时启用它。[#2457](https://github.com/vllm-project/vllm-" +"ascend/pull/2457)" + +#: ../../source/user_guide/release_notes.md:804 +msgid "" +"Sliding window attention not support chunked prefill currently, thus we " +"could only enable AscendScheduler to run with it. " +"[#2729](https://github.com/vllm-project/vllm-ascend/issues/2729)" +msgstr "" +"滑动窗口注意力机制目前不支持分块预填充,因此我们只能启用 AscendScheduler 来运行它。[#2729](https://github.com/vllm-project/vllm-" +"ascend/issues/2729)" + +#: ../../source/user_guide/release_notes.md:805 +msgid "" +"There is a bug with creating mc2_mask when MultiStream is enabled, will " +"fix it in next release. [#2681](https://github.com/vllm-project/vllm-" +"ascend/pull/2681)" +msgstr "" +"启用 MultiStream 时,创建 mc2_mask 存在一个漏洞,将在下一个版本中修复。[#2681](https://github.com/vllm-project/vllm-" +"ascend/pull/2681)" + +#: ../../source/user_guide/release_notes.md:807 +msgid "v0.9.1 - 2025.09.03" +msgstr "v0.9.1 - 2025.09.03" + +#: ../../source/user_guide/release_notes.md:809 +msgid "" +"We are excited to announce the newest official release of vLLM Ascend. " +"This release includes many feature supports, performance improvements and" +" bug fixes. We recommend users to upgrade from 0.7.3 to this version. " +"Please always set `VLLM_USE_V1=1` to use V1 engine." +msgstr "" +"我们很高兴地宣布 vLLM Ascend 的最新正式版本。此版本包含了许多功能支持、性能改进和漏洞修复。我们建议用户从 0.7.3 版本升级至此版本。请始终设置 `VLLM_USE_V1=1` 以使用 V1 引擎。" + +#: ../../source/user_guide/release_notes.md:811 +msgid "" +"In this release, we added many enhancements for large scale expert " +"parallel case. It's recommended to follow the [official " +"guide](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.9.1/docs/source/tutorials/large_scale_ep.md)." +msgstr "" +"在此版本中,我们为大规模专家并行场景添加了许多增强功能。建议遵循[官方指南](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.9.1/docs/source/tutorials/large_scale_ep.md)。" + +#: ../../source/user_guide/release_notes.md:813 +msgid "" +"Please note that this release note will list all the important changes " +"from last official release(v0.7.3)" +msgstr "" +"请注意,本发布说明将列出自上一个正式版本(v0.7.3)以来的所有重要变更。" + +#: ../../source/user_guide/release_notes.md:817 +msgid "" +"DeepSeek V3/R1 is supported with high quality and performance. MTP can " +"work with DeepSeek as well. Please refer to [multi node " +"tutorials](https://docs.vllm.ai/projects/ascend/en/v0.9.1/tutorials/multi_node.html)" +" and [Large Scale Expert Parallelism](https://github.com/vllm-project" +"/vllm-ascend/blob/v0.9.1/docs/source/tutorials/large_scale_ep.md)." +msgstr "" +"高质量、高性能地支持了 DeepSeek V3/R1。MTP 也能与 DeepSeek 协同工作。请参阅[多节点教程](https://docs.vllm.ai/projects/ascend/en/v0.9.1/tutorials/multi_node.html)和[大规模专家并行](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.9.1/docs/source/tutorials/large_scale_ep.md)。" + +#: ../../source/user_guide/release_notes.md:818 +msgid "" +"Qwen series models work with graph mode now. It works by default with V1 " +"Engine. Please refer to [Qwen " +"tutorials](https://docs.vllm.ai/projects/ascend/en/v0.9.1/tutorials/index.html)." +msgstr "" +"Qwen 系列模型现在支持图模式。默认情况下,它与 V1 引擎配合工作。请参阅[Qwen 教程](https://docs.vllm.ai/projects/ascend/en/v0.9.1/tutorials/index.html)。" + +#: ../../source/user_guide/release_notes.md:819 +msgid "" +"Disaggregated Prefilling support for V1 Engine. Please refer to [Large " +"Scale Expert Parallelism](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.9.1/docs/source/tutorials/large_scale_ep.md) tutorials." +msgstr "" +"V1 引擎支持解耦式预填充。请参阅[大规模专家并行](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.9.1/docs/source/tutorials/large_scale_ep.md)教程。" + +#: ../../source/user_guide/release_notes.md:820 +msgid "Automatic prefix caching and chunked prefill feature is supported." +msgstr "支持自动前缀缓存和分块预填充功能。" + +#: ../../source/user_guide/release_notes.md:821 +msgid "Speculative decoding feature works with Ngram and MTP method." +msgstr "推测式解码功能支持 Ngram 和 MTP 方法。" + +#: ../../source/user_guide/release_notes.md:822 +msgid "" +"MOE and dense w4a8 quantization support now. Please refer to " +"[quantization " +"guide](https://docs.vllm.ai/projects/ascend/en/v0.9.1/user_guide/feature_guide/quantization.html)." +msgstr "" +"现已支持 MOE 和稠密模型的 w4a8 量化。请参阅[量化指南](https://docs.vllm.ai/projects/ascend/en/v0.9.1/user_guide/feature_guide/quantization.html)。" + +#: ../../source/user_guide/release_notes.md:823 +msgid "" +"Sleep Mode feature is supported for V1 engine. Please refer to [Sleep " +"mode " +"tutorials](https://docs.vllm.ai/projects/ascend/en/v0.9.1/user_guide/feature_guide/sleep_mode.html)." +msgstr "" +"V1 引擎现已支持睡眠模式功能。请参阅[睡眠模式教程](https://docs.vllm.ai/projects/ascend/en/v0.9.1/user_guide/feature_guide/sleep_mode.html)。" + +#: ../../source/user_guide/release_notes.md:824 +msgid "" +"Dynamic and Static EPLB support is added. This feature is still " +"experimental." +msgstr "" +"已添加动态和静态 EPLB 支持。此功能仍处于实验阶段。" + +#: ../../source/user_guide/release_notes.md:826 +msgid "Note" +msgstr "注意" + +#: ../../source/user_guide/release_notes.md:828 +msgid "" +"The following notes are especially for reference when upgrading from last" +" final release (v0.7.3):" +msgstr "" +"以下说明特别适用于从上一个正式版本 (v0.7.3) 升级时参考:" + +#: ../../source/user_guide/release_notes.md:830 +msgid "" +"V0 Engine is not supported from this release. Please always set " +"`VLLM_USE_V1=1` to use V1 engine with vLLM Ascend." +msgstr "" +"从本版本起不再支持 V0 引擎。请始终设置 `VLLM_USE_V1=1` 以在 vLLM Ascend 中使用 V1 引擎。" + +#: ../../source/user_guide/release_notes.md:831 +msgid "" +"Mindie Turbo is not needed with this release. And the old version of " +"Mindie Turbo is not compatible. Please do not install it. Currently all " +"the function and enhancement is included in vLLM Ascend already. We'll " +"consider to add it back in the future in needed." +msgstr "" +"本版本不再需要 Mindie Turbo。旧版本的 Mindie Turbo 不兼容,请勿安装。目前所有功能和增强已包含在 vLLM Ascend 中。未来如有需要,我们会考虑重新添加。" + +#: ../../source/user_guide/release_notes.md:832 +msgid "" +"Torch-npu is upgraded to 2.5.1.post1. CANN is upgraded to 8.2.RC1. Don't " +"forget to upgrade them." +msgstr "" +"Torch-npu 已升级至 2.5.1.post1。CANN 已升级至 8.2.RC1。请勿忘记升级。" + +#: ../../source/user_guide/release_notes.md:836 +msgid "" +"The Ascend scheduler is added for V1 engine. This scheduler is more " +"affine with Ascend hardware." +msgstr "" +"为 V1 引擎新增了 Ascend 调度器。该调度器与 Ascend 硬件更加适配。" + +#: ../../source/user_guide/release_notes.md:837 +msgid "Structured output feature works now on V1 Engine." +msgstr "结构化输出功能现已在 V1 引擎上可用。" + +#: ../../source/user_guide/release_notes.md:838 +msgid "A batch of custom ops are added to improve the performance." +msgstr "添加了一批自定义算子以提升性能。" + +#: ../../source/user_guide/release_notes.md:840 +msgid "Changes" +msgstr "变更" + +#: ../../source/user_guide/release_notes.md:842 +msgid "" +"EPLB support for Qwen3-moe model. [#2000](https://github.com/vllm-project" +"/vllm-ascend/pull/2000)" +msgstr "" +"为 Qwen3-moe 模型添加 EPLB 支持。[#2000](https://github.com/vllm-project/vllm-ascend/pull/2000)" + +#: ../../source/user_guide/release_notes.md:843 +msgid "" +"Fix the bug that MTP doesn't work well with Prefill Decode " +"Disaggregation. [#2610](https://github.com/vllm-project/vllm-" +"ascend/pull/2610) [#2554](https://github.com/vllm-project/vllm-" +"ascend/pull/2554) [#2531](https://github.com/vllm-project/vllm-" +"ascend/pull/2531)" +msgstr "" +"修复了 MTP 与 Prefill Decode Disaggregation 配合不佳的问题。[#2610](https://github.com/vllm-project/vllm-ascend/pull/2610) [#2554](https://github.com/vllm-project/vllm-ascend/pull/2554) [#2531](https://github.com/vllm-project/vllm-ascend/pull/2531)" + +#: ../../source/user_guide/release_notes.md:844 +msgid "" +"Fix few bugs to make sure Prefill Decode Disaggregation works well. " +"[#2538](https://github.com/vllm-project/vllm-ascend/pull/2538) " +"[#2509](https://github.com/vllm-project/vllm-ascend/pull/2509) " +"[#2502](https://github.com/vllm-project/vllm-ascend/pull/2502)" +msgstr "" +"修复了一些 bug 以确保 Prefill Decode Disaggregation 正常工作。[#2538](https://github.com/vllm-project/vllm-ascend/pull/2538) [#2509](https://github.com/vllm-project/vllm-ascend/pull/2509) [#2502](https://github.com/vllm-project/vllm-ascend/pull/2502)" + +#: ../../source/user_guide/release_notes.md:845 +msgid "" +"Fix file not found error with shutil.rmtree in torchair mode. " +"[#2506](https://github.com/vllm-project/vllm-ascend/pull/2506)" +msgstr "" +"修复了在 torchair 模式下使用 shutil.rmtree 时出现的文件未找到错误。[#2506](https://github.com/vllm-project/vllm-ascend/pull/2506)" + +#: ../../source/user_guide/release_notes.md:849 +msgid "" +"When running MoE model, Aclgraph mode only work with tensor parallel. " +"DP/EP doesn't work in this release." +msgstr "" +"运行 MoE 模型时,Aclgraph 模式仅支持张量并行。DP/EP 在本版本中不可用。" + +#: ../../source/user_guide/release_notes.md:850 +msgid "Pipeline parallelism is not supported in this release for V1 engine." +msgstr "本版本中 V1 引擎不支持流水线并行。" + +#: ../../source/user_guide/release_notes.md:851 +msgid "" +"If you use w4a8 quantization with eager mode, please set " +"`VLLM_ASCEND_MLA_PARALLEL=1` to avoid oom error." +msgstr "" +"如果在 eager 模式下使用 w4a8 量化,请设置 `VLLM_ASCEND_MLA_PARALLEL=1` 以避免内存不足错误。" + +#: ../../source/user_guide/release_notes.md:852 +msgid "" +"Accuracy test with some tools may not be correct. It doesn't affect the " +"real user case. We'll fix it in the next post release. " +"[#2654](https://github.com/vllm-project/vllm-ascend/pull/2654)" +msgstr "" +"使用某些工具进行的精度测试可能不准确。这不影响实际用户场景。我们将在下一个发布版本中修复此问题。[#2654](https://github.com/vllm-project/vllm-ascend/pull/2654)" + +#: ../../source/user_guide/release_notes.md:853 +msgid "" +"We notice that there are still some problems when running vLLM Ascend " +"with Prefill Decode Disaggregation. For example, the memory may be leaked" +" and the service may be stuck. It's caused by known issue by vLLM and " +"vLLM Ascend. We'll fix it in the next post release. " +"[#2650](https://github.com/vllm-project/vllm-ascend/pull/2650) " +"[#2604](https://github.com/vllm-project/vllm-ascend/pull/2604) " +"[vLLM#22736](https://github.com/vllm-project/vllm/pull/22736) " +"[vLLM#23554](https://github.com/vllm-project/vllm/pull/23554) " +"[vLLM#23981](https://github.com/vllm-project/vllm/pull/23981)" +msgstr "" +"我们注意到,在使用 Prefill Decode Disaggregation 运行 vLLM Ascend 时仍存在一些问题。例如,可能出现内存泄漏或服务卡住。这是由 vLLM 和 vLLM Ascend 的已知问题引起的。我们将在下一个发布版本中修复此问题。[#2650](https://github.com/vllm-project/vllm-ascend/pull/2650) [#2604](https://github.com/vllm-project/vllm-ascend/pull/2604) [vLLM#22736](https://github.com/vllm-project/vllm/pull/22736) [vLLM#23554](https://github.com/vllm-project/vllm/pull/23554) [vLLM#23981](https://github.com/vllm-project/vllm/pull/23981)" + +#: ../../source/user_guide/release_notes.md:855 +msgid "v0.9.1rc3 - 2025.08.22" +msgstr "v0.9.1rc3 - 2025.08.22" + +#: ../../source/user_guide/release_notes.md:857 +msgid "" +"This is the 3rd release candidate of v0.9.1 for vLLM Ascend. Please " +"follow the [official " +"doc](https://docs.vllm.ai/projects/ascend/en/v0.9.1/) to get started." +msgstr "" +"这是 vLLM Ascend v0.9.1 的第三个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.9.1/)开始使用。" + +#: ../../source/user_guide/release_notes.md:861 +msgid "" +"MTP supports V1 scheduler [#2371](https://github.com/vllm-project/vllm-" +"ascend/pull/2371)" +msgstr "" +"MTP 支持 V1 调度器 [#2371](https://github.com/vllm-project/vllm-ascend/pull/2371)" + +#: ../../source/user_guide/release_notes.md:862 +msgid "" +"Add LMhead TP communication groups [#1956](https://github.com/vllm-" +"project/vllm-ascend/pull/1956)" +msgstr "" +"添加 LMhead 张量并行通信组 [#1956](https://github.com/vllm-project/vllm-ascend/pull/1956)" + +#: ../../source/user_guide/release_notes.md:863 +msgid "" +"Fix the bug that qwen3 moe doesn't work with aclgraph " +"[#2478](https://github.com/vllm-project/vllm-ascend/pull/2478)" +msgstr "" +"修复了 qwen3 moe 模型在 aclgraph 模式下无法工作的 bug [#2478](https://github.com/vllm-project/vllm-ascend/pull/2478)" + +#: ../../source/user_guide/release_notes.md:864 +msgid "" +"Fix `grammar_bitmask` IndexError caused by outdated " +"`apply_grammar_bitmask` method [#2314](https://github.com/vllm-project" +"/vllm-ascend/pull/2314)" +msgstr "" +"修复了因过时的 `apply_grammar_bitmask` 方法导致的 `grammar_bitmask` IndexError [#2314](https://github.com/vllm-project/vllm-ascend/pull/2314)" + +#: ../../source/user_guide/release_notes.md:865 +msgid "" +"Remove `chunked_prefill_for_mla` [#2177](https://github.com/vllm-project" +"/vllm-ascend/pull/2177)" +msgstr "" +"移除 `chunked_prefill_for_mla` [#2177](https://github.com/vllm-project/vllm-ascend/pull/2177)" + +#: ../../source/user_guide/release_notes.md:866 +msgid "" +"Fix bugs and refactor cached mask generation logic " +"[#2326](https://github.com/vllm-project/vllm-ascend/pull/2326)" +msgstr "" +"修复 bug 并重构缓存掩码生成逻辑 [#2326](https://github.com/vllm-project/vllm-ascend/pull/2326)" + +#: ../../source/user_guide/release_notes.md:867 +msgid "" +"Fix configuration check logic about ascend scheduler " +"[#2327](https://github.com/vllm-project/vllm-ascend/pull/2327)" +msgstr "" +"修复关于 Ascend 调度器的配置检查逻辑 [#2327](https://github.com/vllm-project/vllm-ascend/pull/2327)" + +#: ../../source/user_guide/release_notes.md:868 +msgid "" +"Cancel the verification between deepseek-mtp and non-ascend scheduler in " +"disaggregated-prefill deployment [#2368](https://github.com/vllm-project" +"/vllm-ascend/pull/2368)" +msgstr "" +"在 disaggregated-prefill 部署中取消 deepseek-mtp 与非 Ascend 调度器之间的验证 [#2368](https://github.com/vllm-project/vllm-ascend/pull/2368)" + +#: ../../source/user_guide/release_notes.md:869 +msgid "" +"Fix issue that failed with ray distributed backend " +"[#2306](https://github.com/vllm-project/vllm-ascend/pull/2306)" +msgstr "" +"修复了使用 ray 分布式后端时失败的问题 [#2306](https://github.com/vllm-project/vllm-ascend/pull/2306)" + +#: ../../source/user_guide/release_notes.md:870 +msgid "" +"Fix incorrect req block length in ascend scheduler " +"[#2394](https://github.com/vllm-project/vllm-ascend/pull/2394)" +msgstr "" +"修复 Ascend 调度器中请求块长度不正确的问题 [#2394](https://github.com/vllm-project/vllm-ascend/pull/2394)" + +#: ../../source/user_guide/release_notes.md:871 +msgid "" +"Fix header include issue in rope [#2398](https://github.com/vllm-project" +"/vllm-ascend/pull/2398)" +msgstr "" +"修复 rope 中的头文件包含问题 [#2398](https://github.com/vllm-project/vllm-ascend/pull/2398)" + +#: ../../source/user_guide/release_notes.md:872 +msgid "" +"Fix mtp config bug [#2412](https://github.com/vllm-project/vllm-" +"ascend/pull/2412)" +msgstr "" +"修复 mtp 配置 bug [#2412](https://github.com/vllm-project/vllm-ascend/pull/2412)" + +#: ../../source/user_guide/release_notes.md:873 +msgid "" +"Fix error info and adapt `attn_metadata` refactor " +"[#2402](https://github.com/vllm-project/vllm-ascend/pull/2402)" +msgstr "" +"修复错误信息并适配 `attn_metadata` 重构 [#2402](https://github.com/vllm-project/vllm-ascend/pull/2402)" + +#: ../../source/user_guide/release_notes.md:874 +msgid "" +"Fix torchair runtime error caused by configuration mismatches and " +"`.kv_cache_bytes` file missing [#2312](https://github.com/vllm-project" +"/vllm-ascend/pull/2312)" +msgstr "" +"修复了因配置不匹配和缺少 `.kv_cache_bytes` 文件导致的 torchair 运行时错误 [#2312](https://github.com/vllm-project/vllm-ascend/pull/2312)" + +#: ../../source/user_guide/release_notes.md:875 +msgid "" +"Move `with_prefill` allreduce from cpu to npu [#2230](https://github.com" +"/vllm-project/vllm-ascend/pull/2230)" +msgstr "" +"将 `with_prefill` 的 allreduce 操作从 CPU 移至 NPU [#2230](https://github.com/vllm-project/vllm-ascend/pull/2230)" + +#: ../../source/user_guide/release_notes.md:877 +#: ../../source/user_guide/release_notes.md:1041 +#: ../../source/user_guide/release_notes.md:1223 +msgid "Docs" +msgstr "文档" + +#: ../../source/user_guide/release_notes.md:879 +msgid "" +"Add document for deepseek large EP [#2339](https://github.com/vllm-" +"project/vllm-ascend/pull/2339)" +msgstr "" +"添加 deepseek 大模型 EP 相关文档 [#2339](https://github.com/vllm-project/vllm-ascend/pull/2339)" + +#: ../../source/user_guide/release_notes.md:883 +msgid "" +"`test_aclgraph.py` failed with `\"full_cuda_graph\": True` on A2 (910B1) " +"[#2182](https://github.com/vllm-project/vllm-ascend/issues/2182)" +msgstr "" +"在 A2 (910B1) 上,当 `\"full_cuda_graph\": True` 时,`test_aclgraph.py` 测试失败 [#2182](https://github.com/vllm-project/vllm-ascend/issues/2182)" + +#: ../../source/user_guide/release_notes.md:885 +msgid "v0.10.0rc1 - 2025.08.07" +msgstr "v0.10.0rc1 - 2025.08.07" + +#: ../../source/user_guide/release_notes.md:887 +msgid "" +"This is the 1st release candidate of v0.10.0 for vLLM Ascend. Please " +"follow the [official doc](https://github.com/vllm-project/vllm-" +"ascend/tree/v0.10.0rc1) to get started. V0 is completely removed from " +"this version." +msgstr "" +"这是 vLLM Ascend v0.10.0 的第一个候选发布版本。请按照[官方文档](https://github.com/vllm-project/vllm-ascend/tree/v0.10.0rc1)开始使用。V0 引擎已在此版本中完全移除。" + +#: ../../source/user_guide/release_notes.md:891 +msgid "" +"Disaggregate prefill works with V1 engine now. You can take a try with " +"DeepSeek model [#950](https://github.com/vllm-project/vllm-" +"ascend/pull/950), following this [tutorial](https://github.com/vllm-" +"project/vllm-" +"ascend/blob/v0.10.0rc1/examples/disaggregated_prefill_v1/README.md)." +msgstr "" +"解耦预填充现在可与 V1 引擎协同工作。您可以尝试使用 DeepSeek 模型 [#950](https://github.com/vllm-project/vllm-" +"ascend/pull/950),并按照此[教程](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.10.0rc1/examples/disaggregated_prefill_v1/README.md)操作。" + +#: ../../source/user_guide/release_notes.md:892 +msgid "" +"W4A8 quantization method is supported for dense and MoE model now. " +"[#2060](https://github.com/vllm-project/vllm-ascend/pull/2060) " +"[#2172](https://github.com/vllm-project/vllm-ascend/pull/2172)" +msgstr "" +"现在已支持对稠密模型和 MoE 模型使用 W4A8 量化方法。 [#2060](https://github.com/vllm-project/vllm-ascend/pull/2060) " +"[#2172](https://github.com/vllm-project/vllm-ascend/pull/2172)" + +#: ../../source/user_guide/release_notes.md:896 +msgid "" +"Ascend PyTorch adapter (torch_npu) has been upgraded to " +"`2.7.1.dev20250724`. [#1562](https://github.com/vllm-project/vllm-" +"ascend/pull/1562) And CANN hase been upgraded to `8.2.RC1`. " +"[#1653](https://github.com/vllm-project/vllm-ascend/pull/1653) Don’t " +"forget to update them in your environment or using the latest images." +msgstr "" +"Ascend PyTorch 适配器 (torch_npu) 已升级至 `2.7.1.dev20250724`。 [#1562](https://github.com/vllm-project/vllm-" +"ascend/pull/1562) 同时 CANN 已升级至 `8.2.RC1`。 [#1653](https://github.com/vllm-project/vllm-ascend/pull/1653) " +"请勿忘记在您的环境中更新它们或使用最新的镜像。" + +#: ../../source/user_guide/release_notes.md:897 +msgid "" +"vLLM Ascend works on Atlas 800I A3 now, and the image on A3 will be " +"released from this version on. [#1582](https://github.com/vllm-project" +"/vllm-ascend/pull/1582)" +msgstr "" +"vLLM Ascend 现已在 Atlas 800I A3 上运行,A3 的镜像将从此版本开始发布。 [#1582](https://github.com/vllm-project" +"/vllm-ascend/pull/1582)" + +#: ../../source/user_guide/release_notes.md:898 +msgid "" +"Kimi-K2 with w8a8 quantization, Qwen3-Coder and GLM-4.5 is supported in " +"vLLM Ascend, please following this [tutorial](https://github.com/vllm-" +"project/vllm-" +"ascend/blob/v0.10.0rc1/docs/source/tutorials/multi_node_kimi.md) to have " +"a try. [#2162](https://github.com/vllm-project/vllm-ascend/pull/2162)" +msgstr "" +"vLLM Ascend 现已支持采用 w8a8 量化的 Kimi-K2、Qwen3-Coder 和 GLM-4.5 模型,请按照此[教程](https://github.com/vllm-" +"project/vllm-" +"ascend/blob/v0.10.0rc1/docs/source/tutorials/multi_node_kimi.md)进行尝试。 [#2162](https://github.com/vllm-project/vllm-ascend/pull/2162)" + +#: ../../source/user_guide/release_notes.md:899 +msgid "" +"Pipeline Parallelism is supported in V1 now. [#1800](https://github.com" +"/vllm-project/vllm-ascend/pull/1800)" +msgstr "" +"V1 引擎现已支持流水线并行。 [#1800](https://github.com/vllm-project/vllm-ascend/pull/1800)" + +#: ../../source/user_guide/release_notes.md:900 +msgid "" +"Prefix cache feature now work with the Ascend Scheduler. " +"[#1446](https://github.com/vllm-project/vllm-ascend/pull/1446)" +msgstr "" +"前缀缓存功能现在可与 Ascend 调度器协同工作。 [#1446](https://github.com/vllm-project/vllm-ascend/pull/1446)" + +#: ../../source/user_guide/release_notes.md:901 +msgid "" +"Torchair graph mode works with tp > 4 now. [#1508](https://github.com" +"/vllm-project/vllm-ascend/issues/1508)" +msgstr "" +"Torchair 图模式现在支持 tp > 4 的情况。 [#1508](https://github.com/vllm-project/vllm-ascend/issues/1508)" + +#: ../../source/user_guide/release_notes.md:902 +msgid "" +"MTP support torchair graph mode now [#2145](https://github.com/vllm-" +"project/vllm-ascend/pull/2145)" +msgstr "" +"MTP 现在支持 torchair 图模式 [#2145](https://github.com/vllm-project/vllm-ascend/pull/2145)" + +#: ../../source/user_guide/release_notes.md:907 +msgid "" +"Fix functional problem of multimodality models like Qwen2-audio with " +"Aclgraph. [#1803](https://github.com/vllm-project/vllm-ascend/pull/1803)" +msgstr "" +"修复了 Qwen2-audio 等多模态模型与 Aclgraph 配合使用时的功能问题。 [#1803](https://github.com/vllm-project/vllm-ascend/pull/1803)" + +#: ../../source/user_guide/release_notes.md:908 +msgid "" +"Fix the process group creating error with external launch scenario. " +"[#1681](https://github.com/vllm-project/vllm-ascend/pull/1681)" +msgstr "" +"修复了在外部启动场景下创建进程组的错误。 [#1681](https://github.com/vllm-project/vllm-ascend/pull/1681)" + +#: ../../source/user_guide/release_notes.md:909 +msgid "" +"Fix the functional problem with guided decoding. " +"[#2022](https://github.com/vllm-project/vllm-ascend/pull/2022)" +msgstr "" +"修复了引导式解码的功能问题。 [#2022](https://github.com/vllm-project/vllm-ascend/pull/2022)" + +#: ../../source/user_guide/release_notes.md:910 +msgid "" +"Fix the accuracy issue with common MoE models in DP scenario. " +"[#1856](https://github.com/vllm-project/vllm-ascend/pull/1856)" +msgstr "" +"修复了在 DP 场景下常见 MoE 模型的准确性问题。 [#1856](https://github.com/vllm-project/vllm-ascend/pull/1856)" + +#: ../../source/user_guide/release_notes.md:912 +msgid "" +"Caching sin/cos instead of calculate it every layer. " +"[#1890](https://github.com/vllm-project/vllm-ascend/pull/1890)" +msgstr "" +"缓存 sin/cos 值,而不是在每一层都重新计算。 [#1890](https://github.com/vllm-project/vllm-ascend/pull/1890)" + +#: ../../source/user_guide/release_notes.md:913 +msgid "" +"Improve shared expert multi-stream parallelism [#1891](https://github.com" +"/vllm-project/vllm-ascend/pull/1891)" +msgstr "" +"改进了共享专家的多流并行 [#1891](https://github.com/vllm-project/vllm-ascend/pull/1891)" + +#: ../../source/user_guide/release_notes.md:914 +msgid "" +"Implement the fusion of allreduce and matmul in prefill phase when tp is " +"enabled. Enable this feature by setting " +"`VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE` to `1`. [#1926](https://github.com" +"/vllm-project/vllm-ascend/pull/1926)" +msgstr "" +"在启用张量并行时,实现了预填充阶段 allreduce 与 matmul 的融合。通过设置 `VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE` 为 `1` 来启用此功能。 [#1926](https://github.com/vllm-project/vllm-ascend/pull/1926)" + +#: ../../source/user_guide/release_notes.md:915 +msgid "" +"Optimize Quantized MoE Performance by Reducing All2All Communication. " +"[#2195](https://github.com/vllm-project/vllm-ascend/pull/2195)" +msgstr "" +"通过减少 All2All 通信来优化量化 MoE 的性能。 [#2195](https://github.com/vllm-project/vllm-ascend/pull/2195)" + +#: ../../source/user_guide/release_notes.md:916 +msgid "" +"Use AddRmsNormQuant ops in the custom model to optimize Qwen3's " +"performance [#1806](https://github.com/vllm-project/vllm-" +"ascend/pull/1806)" +msgstr "" +"在自定义模型中使用 AddRmsNormQuant 算子以优化 Qwen3 的性能 [#1806](https://github.com/vllm-project/vllm-" +"ascend/pull/1806)" + +#: ../../source/user_guide/release_notes.md:917 +msgid "" +"Use multicast to avoid padding decode request to prefill size " +"[#1555](https://github.com/vllm-project/vllm-ascend/pull/1555)" +msgstr "" +"使用组播来避免将解码请求填充至预填充大小 [#1555](https://github.com/vllm-project/vllm-ascend/pull/1555)" + +#: ../../source/user_guide/release_notes.md:918 +msgid "" +"The performance of LoRA has been improved. [#1884](https://github.com" +"/vllm-project/vllm-ascend/pull/1884)" +msgstr "" +"LoRA 的性能已得到提升。 [#1884](https://github.com/vllm-project/vllm-ascend/pull/1884)" + +#: ../../source/user_guide/release_notes.md:919 +msgid "A batch of refactoring prs to enhance the code architecture:" +msgstr "一系列重构 PR 以增强代码架构:" + +#: ../../source/user_guide/release_notes.md:920 +msgid "" +"Torchair model runner refactor [#2205](https://github.com/vllm-project" +"/vllm-ascend/pull/2205)" +msgstr "" +"重构 Torchair 模型运行器 [#2205](https://github.com/vllm-project/vllm-ascend/pull/2205)" + +#: ../../source/user_guide/release_notes.md:921 +msgid "" +"Refactoring forward_context and model_runner_v1. " +"[#1979](https://github.com/vllm-project/vllm-ascend/pull/1979)" +msgstr "" +"重构 forward_context 和 model_runner_v1。 [#1979](https://github.com/vllm-project/vllm-ascend/pull/1979)" + +#: ../../source/user_guide/release_notes.md:922 +msgid "" +"Refactor AscendMetaData Comments. [#1967](https://github.com/vllm-project" +"/vllm-ascend/pull/1967)" +msgstr "" +"重构 AscendMetaData 注释。 [#1967](https://github.com/vllm-project/vllm-ascend/pull/1967)" + +#: ../../source/user_guide/release_notes.md:923 +msgid "" +"Refactor torchair utils. [#1892](https://github.com/vllm-project/vllm-" +"ascend/pull/1892)" +msgstr "" +"重构 torchair 工具集。 [#1892](https://github.com/vllm-project/vllm-ascend/pull/1892)" + +#: ../../source/user_guide/release_notes.md:924 +msgid "" +"Refactor torchair worker. [#1885](https://github.com/vllm-project/vllm-" +"ascend/pull/1885)" +msgstr "" +"重构 torchair worker。 [#1885](https://github.com/vllm-project/vllm-ascend/pull/1885)" + +#: ../../source/user_guide/release_notes.md:925 +msgid "" +"Register activation customop instead of overwrite forward_oot. " +"[#1841](https://github.com/vllm-project/vllm-ascend/pull/1841)" +msgstr "" +"注册激活自定义算子,而非覆盖 forward_oot。 [#1841](https://github.com/vllm-project/vllm-ascend/pull/1841)" + +#: ../../source/user_guide/release_notes.md:927 +msgid "" +"`expert_tensor_parallel_size` in `additional_config` is removed now, and " +"the EP and TP is aligned with vLLM now. [#1681](https://github.com/vllm-" +"project/vllm-ascend/pull/1681)" +msgstr "" +"`additional_config` 中的 `expert_tensor_parallel_size` 现已被移除,EP 和 TP 现已与 vLLM 对齐。 [#1681](https://github.com/vllm-project/vllm-ascend/pull/1681)" + +#: ../../source/user_guide/release_notes.md:928 +msgid "" +"Add `VLLM_ASCEND_MLA_PA` in environ variables, use this to enable mla " +"paged attention operator for deepseek mla decode." +msgstr "" +"在环境变量中添加 `VLLM_ASCEND_MLA_PA`,用于启用 deepseek mla 解码的 mla 分页注意力算子。" + +#: ../../source/user_guide/release_notes.md:929 +msgid "" +"Add `VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE` in environ variables, enable " +"`MatmulAllReduce` fusion kernel when tensor parallel is enabled. This " +"feature is supported in A2, and eager mode will get better performance." +msgstr "" +"在环境变量中添加 `VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE`,用于在启用张量并行时启用 `MatmulAllReduce` 融合内核。此功能在 A2 上受支持,且 eager 模式将获得更好的性能。" + +#: ../../source/user_guide/release_notes.md:930 +msgid "" +"Add `VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ` in environ variables, Whether to" +" enable moe all2all seq, this provides a basic framework on the basis of " +"alltoall for easy expansion." +msgstr "" +"在环境变量中添加 `VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ`,用于决定是否启用 moe all2all seq,这为在 alltoall 基础上进行扩展提供了一个基础框架。" + +#: ../../source/user_guide/release_notes.md:932 +msgid "" +"UT coverage reached 76.34% after a batch of prs followed by this rfc: " +"[#1298](https://github.com/vllm-project/vllm-ascend/issues/1298)" +msgstr "" +"在遵循此 RFC [#1298](https://github.com/vllm-project/vllm-ascend/issues/1298) 提交一系列 PR 后,单元测试覆盖率已达到 76.34%。" + +#: ../../source/user_guide/release_notes.md:933 +msgid "" +"Sequence Parallelism works for Qwen3 MoE. [#2209](https://github.com" +"/vllm-project/vllm-ascend/issues/2209)" +msgstr "" +"序列并行现可用于 Qwen3 MoE。 [#2209](https://github.com/vllm-project/vllm-ascend/issues/2209)" + +#: ../../source/user_guide/release_notes.md:934 +msgid "" +"Chinese online document is added now. [#1870](https://github.com/vllm-" +"project/vllm-ascend/issues/1870)" +msgstr "" +"现已添加中文在线文档。 [#1870](https://github.com/vllm-project/vllm-ascend/issues/1870)" + +#: ../../source/user_guide/release_notes.md:938 +msgid "" +"Aclgraph could not work with DP + EP currently, the mainly gap is the " +"number of npu stream that Aclgraph needed to capture graph is not enough." +" [#2229](https://github.com/vllm-project/vllm-ascend/issues/2229)" +msgstr "" +"Aclgraph 目前无法与 DP + EP 协同工作,主要差距在于 Aclgraph 捕获图所需的 NPU 流数量不足。 [#2229](https://github.com/vllm-project/vllm-ascend/issues/2229)" + +#: ../../source/user_guide/release_notes.md:939 +msgid "" +"There is an accuracy issue on W8A8 dynamic quantized DeepSeek with " +"multistream enabled. This will be fixed in the next release. " +"[#2232](https://github.com/vllm-project/vllm-ascend/issues/2232)" +msgstr "" +"启用多流时,W8A8 动态量化的 DeepSeek 模型存在准确性问题。此问题将在下一个版本中修复。 [#2232](https://github.com/vllm-project/vllm-ascend/issues/2232)" + +#: ../../source/user_guide/release_notes.md:940 +msgid "" +"In Qwen3 MoE, SP cannot be incorporated into the Aclgraph. " +"[#2246](https://github.com/vllm-project/vllm-ascend/issues/2246)" +msgstr "" +"在 Qwen3 MoE 中,序列并行无法集成到 Aclgraph 中。 [#2246](https://github.com/vllm-project/vllm-ascend/issues/2246)" + +#: ../../source/user_guide/release_notes.md:941 +msgid "" +"MTP not support V1 scheduler currently, will fix it in Q3. " +"[#2254](https://github.com/vllm-project/vllm-ascend/issues/2254)" +msgstr "" +"MTP 目前不支持 V1 调度器,将在第三季度修复此问题。 [#2254](https://github.com/vllm-project/vllm-ascend/issues/2254)" + +#: ../../source/user_guide/release_notes.md:942 +msgid "" +"When running MTP with DP > 1, we need to disable metrics logger due to " +"some issue on vLLM. [#2254](https://github.com/vllm-project/vllm-" +"ascend/issues/2254)" +msgstr "" +"当以 DP > 1 运行 MTP 时,由于 vLLM 的某些问题,需要禁用指标记录器。 [#2254](https://github.com/vllm-project/vllm-" +"ascend/issues/2254)" + +#: ../../source/user_guide/release_notes.md:944 +msgid "v0.9.1rc2 - 2025.08.04" +msgstr "v0.9.1rc2 - 2025年08月04日" + +#: ../../source/user_guide/release_notes.md:946 +msgid "" +"This is the 2nd release candidate of v0.9.1 for vLLM Ascend. Please " +"follow the [official " +"doc](https://docs.vllm.ai/projects/ascend/en/v0.9.1/) to get started." +msgstr "" +"这是 vLLM Ascend v0.9.1 的第二个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.9.1/)开始使用。" + +#: ../../source/user_guide/release_notes.md:950 +msgid "" +"MOE and dense w4a8 quantization support now: [#1320](https://github.com" +"/vllm-project/vllm-ascend/pull/1320) [#1910](https://github.com/vllm-" +"project/vllm-ascend/pull/1910) [#1275](https://github.com/vllm-project" +"/vllm-ascend/pull/1275) [#1480](https://github.com/vllm-project/vllm-" +"ascend/pull/1480)" +msgstr "" +"现已支持 MOE 和密集模型的 w4a8 量化:[#1320](https://github.com/vllm-project/vllm-ascend/pull/1320) [#1910](https://github.com/vllm-project/vllm-ascend/pull/1910) [#1275](https://github.com/vllm-project/vllm-ascend/pull/1275) [#1480](https://github.com/vllm-project/vllm-ascend/pull/1480)" + +#: ../../source/user_guide/release_notes.md:951 +msgid "" +"Dynamic EPLB support in [#1943](https://github.com/vllm-project/vllm-" +"ascend/pull/1943)" +msgstr "" +"在 [#1943](https://github.com/vllm-project/vllm-ascend/pull/1943) 中支持了动态 EPLB。" + +#: ../../source/user_guide/release_notes.md:952 +msgid "" +"Disaggregated Prefilling support for V1 Engine and improvement, continued" +" development and stabilization of the disaggregated prefill feature, " +"including performance enhancements and bug fixes for single-machine " +"setups:[#1953](https://github.com/vllm-project/vllm-ascend/pull/1953) " +"[#1612](https://github.com/vllm-project/vllm-ascend/pull/1612) " +"[#1361](https://github.com/vllm-project/vllm-ascend/pull/1361) " +"[#1746](https://github.com/vllm-project/vllm-ascend/pull/1746) " +"[#1552](https://github.com/vllm-project/vllm-ascend/pull/1552) " +"[#1801](https://github.com/vllm-project/vllm-ascend/pull/1801) " +"[#2083](https://github.com/vllm-project/vllm-ascend/pull/2083) " +"[#1989](https://github.com/vllm-project/vllm-ascend/pull/1989)" +msgstr "" +"为 V1 引擎支持并改进了分离式预填充,持续开发和稳定该功能,包括针对单机设置的性能提升和错误修复:[#1953](https://github.com/vllm-project/vllm-ascend/pull/1953) [#1612](https://github.com/vllm-project/vllm-ascend/pull/1612) [#1361](https://github.com/vllm-project/vllm-ascend/pull/1361) [#1746](https://github.com/vllm-project/vllm-ascend/pull/1746) [#1552](https://github.com/vllm-project/vllm-ascend/pull/1552) [#1801](https://github.com/vllm-project/vllm-ascend/pull/1801) [#2083](https://github.com/vllm-project/vllm-ascend/pull/2083) [#1989](https://github.com/vllm-project/vllm-ascend/pull/1989)" + +#: ../../source/user_guide/release_notes.md:954 +msgid "Model Improvement" +msgstr "模型改进" + +#: ../../source/user_guide/release_notes.md:956 +msgid "" +"DeepSeek DeepSeek DBO support and improvement: [#1285](https://github.com" +"/vllm-project/vllm-ascend/pull/1285) [#1291](https://github.com/vllm-" +"project/vllm-ascend/pull/1291) [#1328](https://github.com/vllm-project" +"/vllm-ascend/pull/1328) [#1420](https://github.com/vllm-project/vllm-" +"ascend/pull/1420) [#1445](https://github.com/vllm-project/vllm-" +"ascend/pull/1445) [#1589](https://github.com/vllm-project/vllm-" +"ascend/pull/1589) [#1759](https://github.com/vllm-project/vllm-" +"ascend/pull/1759) [#1827](https://github.com/vllm-project/vllm-" +"ascend/pull/1827) [#2093](https://github.com/vllm-project/vllm-" +"ascend/pull/2093)" +msgstr "" +"DeepSeek DBO 支持与改进:[#1285](https://github.com/vllm-project/vllm-ascend/pull/1285) [#1291](https://github.com/vllm-project/vllm-ascend/pull/1291) [#1328](https://github.com/vllm-project/vllm-ascend/pull/1328) [#1420](https://github.com/vllm-project/vllm-ascend/pull/1420) [#1445](https://github.com/vllm-project/vllm-ascend/pull/1445) [#1589](https://github.com/vllm-project/vllm-ascend/pull/1589) [#1759](https://github.com/vllm-project/vllm-ascend/pull/1759) [#1827](https://github.com/vllm-project/vllm-ascend/pull/1827) [#2093](https://github.com/vllm-project/vllm-ascend/pull/2093)" + +#: ../../source/user_guide/release_notes.md:957 +msgid "" +"DeepSeek MTP improvement and bugfix: [#1214](https://github.com/vllm-" +"project/vllm-ascend/pull/1214) [#943](https://github.com/vllm-project" +"/vllm-ascend/pull/943) [#1584](https://github.com/vllm-project/vllm-" +"ascend/pull/1584) [#1473](https://github.com/vllm-project/vllm-" +"ascend/pull/1473) [#1294](https://github.com/vllm-project/vllm-" +"ascend/pull/1294) [#1632](https://github.com/vllm-project/vllm-" +"ascend/pull/1632) [#1694](https://github.com/vllm-project/vllm-" +"ascend/pull/1694) [#1840](https://github.com/vllm-project/vllm-" +"ascend/pull/1840) [#2076](https://github.com/vllm-project/vllm-" +"ascend/pull/2076) [#1990](https://github.com/vllm-project/vllm-" +"ascend/pull/1990) [#2019](https://github.com/vllm-project/vllm-" +"ascend/pull/2019)" +msgstr "" +"DeepSeek MTP 改进与错误修复:[#1214](https://github.com/vllm-project/vllm-ascend/pull/1214) [#943](https://github.com/vllm-project/vllm-ascend/pull/943) [#1584](https://github.com/vllm-project/vllm-ascend/pull/1584) [#1473](https://github.com/vllm-project/vllm-ascend/pull/1473) [#1294](https://github.com/vllm-project/vllm-ascend/pull/1294) [#1632](https://github.com/vllm-project/vllm-ascend/pull/1632) [#1694](https://github.com/vllm-project/vllm-ascend/pull/1694) [#1840](https://github.com/vllm-project/vllm-ascend/pull/1840) [#2076](https://github.com/vllm-project/vllm-ascend/pull/2076) [#1990](https://github.com/vllm-project/vllm-ascend/pull/1990) [#2019](https://github.com/vllm-project/vllm-ascend/pull/2019)" + +#: ../../source/user_guide/release_notes.md:958 +msgid "" +"Qwen3 MoE support improvement and bugfix around graph mode and DP: " +"[#1940](https://github.com/vllm-project/vllm-ascend/pull/1940) " +"[#2006](https://github.com/vllm-project/vllm-ascend/pull/2006) " +"[#1832](https://github.com/vllm-project/vllm-ascend/pull/1832)" +msgstr "" +"Qwen3 MoE 在图模式和 DP 方面的支持改进与错误修复:[#1940](https://github.com/vllm-project/vllm-ascend/pull/1940) [#2006](https://github.com/vllm-project/vllm-ascend/pull/2006) [#1832](https://github.com/vllm-project/vllm-ascend/pull/1832)" + +#: ../../source/user_guide/release_notes.md:959 +msgid "" +"Qwen3 performance improvement around rmsnorm/repo/mlp ops: " +"[#1545](https://github.com/vllm-project/vllm-ascend/pull/1545) " +"[#1719](https://github.com/vllm-project/vllm-ascend/pull/1719) " +"[#1726](https://github.com/vllm-project/vllm-ascend/pull/1726) " +"[#1782](https://github.com/vllm-project/vllm-ascend/pull/1782) " +"[#1745](https://github.com/vllm-project/vllm-ascend/pull/1745)" +msgstr "" +"Qwen3 在 rmsnorm/repo/mlp 算子方面的性能改进:[#1545](https://github.com/vllm-project/vllm-ascend/pull/1545) [#1719](https://github.com/vllm-project/vllm-ascend/pull/1719) [#1726](https://github.com/vllm-project/vllm-ascend/pull/1726) [#1782](https://github.com/vllm-project/vllm-ascend/pull/1782) [#1745](https://github.com/vllm-project/vllm-ascend/pull/1745)" + +#: ../../source/user_guide/release_notes.md:960 +msgid "" +"DeepSeek MLA chunked prefill/graph mode/multistream improvement and " +"bugfix: [#1240](https://github.com/vllm-project/vllm-ascend/pull/1240) " +"[#933](https://github.com/vllm-project/vllm-ascend/pull/933) " +"[#1135](https://github.com/vllm-project/vllm-ascend/pull/1135) " +"[#1311](https://github.com/vllm-project/vllm-ascend/pull/1311) " +"[#1750](https://github.com/vllm-project/vllm-ascend/pull/1750) " +"[#1872](https://github.com/vllm-project/vllm-ascend/pull/1872) " +"[#2170](https://github.com/vllm-project/vllm-ascend/pull/2170) " +"[#1551](https://github.com/vllm-project/vllm-ascend/pull/1551)" +msgstr "" +"DeepSeek MLA 分块预填充/图模式/多流改进与错误修复:[#1240](https://github.com/vllm-project/vllm-ascend/pull/1240) [#933](https://github.com/vllm-project/vllm-ascend/pull/933) [#1135](https://github.com/vllm-project/vllm-ascend/pull/1135) [#1311](https://github.com/vllm-project/vllm-ascend/pull/1311) [#1750](https://github.com/vllm-project/vllm-ascend/pull/1750) [#1872](https://github.com/vllm-project/vllm-ascend/pull/1872) [#2170](https://github.com/vllm-project/vllm-ascend/pull/2170) [#1551](https://github.com/vllm-project/vllm-ascend/pull/1551)" + +#: ../../source/user_guide/release_notes.md:961 +msgid "" +"Qwen2.5 VL improvement via mrope/padding mechanism improvement: " +"[#1261](https://github.com/vllm-project/vllm-ascend/pull/1261) " +"[#1705](https://github.com/vllm-project/vllm-ascend/pull/1705) " +"[#1929](https://github.com/vllm-project/vllm-ascend/pull/1929) " +"[#2007](https://github.com/vllm-project/vllm-ascend/pull/2007)" +msgstr "" +"通过改进 mrope/填充机制提升 Qwen2.5 VL:[#1261](https://github.com/vllm-project/vllm-ascend/pull/1261) [#1705](https://github.com/vllm-project/vllm-ascend/pull/1705) [#1929](https://github.com/vllm-project/vllm-ascend/pull/1929) [#2007](https://github.com/vllm-project/vllm-ascend/pull/2007)" + +#: ../../source/user_guide/release_notes.md:962 +msgid "" +"Ray: Fix the device error when using ray and add initialize_cache and " +"improve warning info: [#1234](https://github.com/vllm-project/vllm-" +"ascend/pull/1234) [#1501](https://github.com/vllm-project/vllm-" +"ascend/pull/1501)" +msgstr "" +"Ray:修复使用 ray 时的设备错误,添加 initialize_cache 并改进警告信息:[#1234](https://github.com/vllm-project/vllm-ascend/pull/1234) [#1501](https://github.com/vllm-project/vllm-ascend/pull/1501)" + +#: ../../source/user_guide/release_notes.md:964 +msgid "Graph Mode Improvement" +msgstr "图模式改进" + +#: ../../source/user_guide/release_notes.md:966 +msgid "" +"Fix DeepSeek with deepseek with mc2 in [#1269](https://github.com/vllm-" +"project/vllm-ascend/pull/1269)" +msgstr "" +"在 [#1269](https://github.com/vllm-project/vllm-ascend/pull/1269) 中修复了 DeepSeek 与 mc2 的问题。" + +#: ../../source/user_guide/release_notes.md:967 +msgid "" +"Fix accuracy problem for deepseek V3/R1 models with torchair graph in " +"long sequence predictions in [#1332](https://github.com/vllm-project" +"/vllm-ascend/pull/1332)" +msgstr "" +"在 [#1332](https://github.com/vllm-project/vllm-ascend/pull/1332) 中修复了 deepseek V3/R1 模型在使用 torchair 图进行长序列预测时的精度问题。" + +#: ../../source/user_guide/release_notes.md:968 +msgid "" +"Fix torchair_graph_batch_sizes bug in [#1570](https://github.com/vllm-" +"project/vllm-ascend/pull/1570)" +msgstr "" +"在 [#1570](https://github.com/vllm-project/vllm-ascend/pull/1570) 中修复了 torchair_graph_batch_sizes 错误。" + +#: ../../source/user_guide/release_notes.md:969 +msgid "" +"Enable the limit of tp <= 4 for torchair graph mode in " +"[#1404](https://github.com/vllm-project/vllm-ascend/pull/1404)" +msgstr "" +"在 [#1404](https://github.com/vllm-project/vllm-ascend/pull/1404) 中为 torchair 图模式启用了 tp <= 4 的限制。" + +#: ../../source/user_guide/release_notes.md:970 +msgid "" +"Fix rope accuracy bug [#1887](https://github.com/vllm-project/vllm-" +"ascend/pull/1887)" +msgstr "" +"修复 rope 精度错误 [#1887](https://github.com/vllm-project/vllm-ascend/pull/1887)" + +#: ../../source/user_guide/release_notes.md:971 +msgid "" +"Support multistream of shared experts in FusedMoE " +"[#997](https://github.com/vllm-project/vllm-ascend/pull/997)" +msgstr "" +"支持 FusedMoE 中共享专家的多流处理 [#997](https://github.com/vllm-project/vllm-ascend/pull/997)" + +#: ../../source/user_guide/release_notes.md:972 +msgid "" +"Enable kvcache_nz for the decode process in torchair graph " +"mode[#1098](https://github.com/vllm-project/vllm-ascend/pull/1098)" +msgstr "" +"在 torchair 图模式的解码过程中启用 kvcache_nz [#1098](https://github.com/vllm-project/vllm-ascend/pull/1098)" + +#: ../../source/user_guide/release_notes.md:973 +msgid "" +"Fix chunked-prefill with torchair case to resolve UnboundLocalError: " +"local variable 'decode_hs_or_q_c' issue in [#1378](https://github.com" +"/vllm-project/vllm-ascend/pull/1378)" +msgstr "" +"修复结合 torchair 的分块预填充场景,解决 UnboundLocalError: local variable 'decode_hs_or_q_c' 问题 [#1378](https://github.com/vllm-project/vllm-ascend/pull/1378)" + +#: ../../source/user_guide/release_notes.md:974 +msgid "" +"Improve shared experts multi-stream perf for w8a8 dynamic. in " +"[#1561](https://github.com/vllm-project/vllm-ascend/pull/1561)" +msgstr "" +"在 [#1561](https://github.com/vllm-project/vllm-ascend/pull/1561) 中改进了 w8a8 动态量化下共享专家的多流性能。" + +#: ../../source/user_guide/release_notes.md:975 +msgid "" +"Repair moe error when set multistream. in [#1882](https://github.com" +"/vllm-project/vllm-ascend/pull/1882)" +msgstr "" +"在 [#1882](https://github.com/vllm-project/vllm-ascend/pull/1882) 中修复了设置多流时的 moe 错误。" + +#: ../../source/user_guide/release_notes.md:976 +msgid "" +"Round up graph batch size to tp size in EP case " +"[#1610](https://github.com/vllm-project/vllm-ascend/pull/1610)" +msgstr "" +"在 EP 场景下将图批次大小向上取整至 tp 大小 [#1610](https://github.com/vllm-project/vllm-ascend/pull/1610)" + +#: ../../source/user_guide/release_notes.md:977 +msgid "" +"Fix torchair bug when DP is enabled in [#1727](https://github.com/vllm-" +"project/vllm-ascend/pull/1727)" +msgstr "" +"在 [#1727](https://github.com/vllm-project/vllm-ascend/pull/1727) 中修复了启用 DP 时的 torchair 错误。" + +#: ../../source/user_guide/release_notes.md:978 +msgid "" +"Add extra checking to torchair_graph_config. in " +"[#1675](https://github.com/vllm-project/vllm-ascend/pull/1675)" +msgstr "" +"在 [#1675](https://github.com/vllm-project/vllm-ascend/pull/1675) 中为 torchair_graph_config 添加了额外检查。" + +#: ../../source/user_guide/release_notes.md:979 +msgid "" +"Fix rope bug in torchair+chunk-prefill scenario in " +"[#1693](https://github.com/vllm-project/vllm-ascend/pull/1693)" +msgstr "" +"在 [#1693](https://github.com/vllm-project/vllm-ascend/pull/1693) 中修复了 torchair+分块预填充场景下的 rope 错误。" + +#: ../../source/user_guide/release_notes.md:980 +msgid "" +"torchair_graph bugfix when chunked_prefill is true in " +"[#1748](https://github.com/vllm-project/vllm-ascend/pull/1748)" +msgstr "" +"在 [#1748](https://github.com/vllm-project/vllm-ascend/pull/1748) 中修复了 chunked_prefill 为 true 时的 torchair_graph 错误。" + +#: ../../source/user_guide/release_notes.md:981 +msgid "" +"Improve prefill optimization to support torchair graph mode in " +"[#2090](https://github.com/vllm-project/vllm-ascend/pull/2090)" +msgstr "" +"在 [#2090](https://github.com/vllm-project/vllm-ascend/pull/2090) 中改进了预填充优化以支持 torchair 图模式。" + +#: ../../source/user_guide/release_notes.md:982 +msgid "" +"Fix rank set in DP scenario [#1247](https://github.com/vllm-project/vllm-" +"ascend/pull/1247)" +msgstr "" +"修复 DP 场景下的 rank 设置 [#1247](https://github.com/vllm-project/vllm-ascend/pull/1247)" + +#: ../../source/user_guide/release_notes.md:983 +msgid "" +"Reset all unused positions to prevent out-of-bounds to resolve GatherV3 " +"bug in [#1397](https://github.com/vllm-project/vllm-ascend/pull/1397)" +msgstr "" +"重置所有未使用的位置以防止越界,以解决 GatherV3 错误 [#1397](https://github.com/vllm-project/vllm-ascend/pull/1397)" + +#: ../../source/user_guide/release_notes.md:984 +msgid "" +"Remove duplicate multimodal codes in ModelRunner in " +"[#1393](https://github.com/vllm-project/vllm-ascend/pull/1393)" +msgstr "" +"移除 ModelRunner 中的重复多模态代码 [#1393](https://github.com/vllm-project/vllm-" +"ascend/pull/1393)" + +#: ../../source/user_guide/release_notes.md:985 +msgid "" +"Fix block table shape to resolve accuracy issue in " +"[#1297](https://github.com/vllm-project/vllm-ascend/pull/1297)" +msgstr "" +"修复块表形状以解决准确性问题 [#1297](https://github.com/vllm-project/vllm-" +"ascend/pull/1297)" + +#: ../../source/user_guide/release_notes.md:986 +msgid "" +"Implement primal full graph with limited scenario in " +"[#1503](https://github.com/vllm-project/vllm-ascend/pull/1503)" +msgstr "" +"在有限场景下实现原始全图 [#1503](https://github.com/vllm-project/vllm-" +"ascend/pull/1503)" + +#: ../../source/user_guide/release_notes.md:987 +msgid "" +"Restore paged attention kernel in Full Graph for performance in " +"[#1677](https://github.com/vllm-project/vllm-ascend/pull/1677)" +msgstr "" +"在全图中恢复分页注意力内核以提升性能 [#1677](https://github.com/vllm-project/vllm-" +"ascend/pull/1677)" + +#: ../../source/user_guide/release_notes.md:988 +msgid "" +"Fix DeepSeek OOM issue in extreme `--gpu-memory-utilization` scenario in " +"[#1829](https://github.com/vllm-project/vllm-ascend/pull/1829)" +msgstr "" +"修复 DeepSeek 在极端 `--gpu-memory-utilization` 场景下的 OOM 问题 " +"[#1829](https://github.com/vllm-project/vllm-ascend/pull/1829)" + +#: ../../source/user_guide/release_notes.md:989 +msgid "" +"Turn off aclgraph when enabling TorchAir in [#2154](https://github.com" +"/vllm-project/vllm-ascend/pull/2154)" +msgstr "" +"启用 TorchAir 时关闭 aclgraph [#2154](https://github.com/vllm-project/vllm-" +"ascend/pull/2154)" + +#: ../../source/user_guide/release_notes.md:991 +msgid "Operator Improvement" +msgstr "算子改进" + +#: ../../source/user_guide/release_notes.md:993 +msgid "" +"Added custom AscendC kernel `vocabparallelembedding` " +"[#796](https://github.com/vllm-project/vllm-ascend/pull/796)" +msgstr "" +"添加自定义 AscendC 内核 `vocabparallelembedding` [#796](https://github.com/vllm-" +"project/vllm-ascend/pull/796)" + +#: ../../source/user_guide/release_notes.md:994 +msgid "" +"Fixed rope sin/cos cache bug in [#1267](https://github.com/vllm-project" +"/vllm-ascend/pull/1267)" +msgstr "" +"修复 rope sin/cos 缓存错误 [#1267](https://github.com/vllm-project/vllm-" +"ascend/pull/1267)" + +#: ../../source/user_guide/release_notes.md:995 +msgid "" +"Refactored AscendFusedMoE (#1229) in [#1264](https://github.com/vllm-" +"project/vllm-ascend/pull/1264)" +msgstr "" +"重构 AscendFusedMoE (#1229) [#1264](https://github.com/vllm-project/vllm-" +"ascend/pull/1264)" + +#: ../../source/user_guide/release_notes.md:996 +msgid "" +"Used fused ops npu_top_k_top_p in sampler [#1920](https://github.com" +"/vllm-project/vllm-ascend/pull/1920)" +msgstr "" +"在采样器中使用融合算子 npu_top_k_top_p [#1920](https://github.com/vllm-project" +"/vllm-ascend/pull/1920)" + +#: ../../source/user_guide/release_notes.md:1000 +msgid "" +"Upgraded CANN to 8.2.rc1 in [#2036](https://github.com/vllm-project/vllm-" +"ascend/pull/2036)" +msgstr "" +"将 CANN 升级至 8.2.rc1 [#2036](https://github.com/vllm-project/vllm-" +"ascend/pull/2036)" + +#: ../../source/user_guide/release_notes.md:1001 +msgid "" +"Upgraded torch-npu to 2.5.1.post1 in [#2135](https://github.com/vllm-" +"project/vllm-ascend/pull/2135)" +msgstr "" +"将 torch-npu 升级至 2.5.1.post1 [#2135](https://github.com/vllm-project/vllm-" +"ascend/pull/2135)" + +#: ../../source/user_guide/release_notes.md:1002 +msgid "" +"Upgraded python to 3.11 in [#2136](https://github.com/vllm-project/vllm-" +"ascend/pull/2136)" +msgstr "" +"将 Python 升级至 3.11 [#2136](https://github.com/vllm-project/vllm-" +"ascend/pull/2136)" + +#: ../../source/user_guide/release_notes.md:1003 +msgid "" +"Disabled quantization in mindie_turbo in [#1749](https://github.com" +"/vllm-project/vllm-ascend/pull/1749)" +msgstr "" +"在 mindie_turbo 中禁用量化 [#1749](https://github.com/vllm-project/vllm-" +"ascend/pull/1749)" + +#: ../../source/user_guide/release_notes.md:1004 +msgid "" +"Fixed v0 spec decode in [#1323](https://github.com/vllm-project/vllm-" +"ascend/pull/1323)" +msgstr "" +"修复 v0 推测解码 [#1323](https://github.com/vllm-project/vllm-ascend/pull/1323)" + +#: ../../source/user_guide/release_notes.md:1005 +msgid "" +"Enabled `ACL_OP_INIT_MODE=1` directly only when using V0 spec decode in " +"[#1271](https://github.com/vllm-project/vllm-ascend/pull/1271)" +msgstr "" +"仅在使用 V0 推测解码时直接启用 `ACL_OP_INIT_MODE=1` [#1271](https://github.com" +"/vllm-project/vllm-ascend/pull/1271)" + +#: ../../source/user_guide/release_notes.md:1006 +msgid "" +"Refactoring forward_context and model_runner_v1 in " +"[#1422](https://github.com/vllm-project/vllm-ascend/pull/1422)" +msgstr "" +"重构 forward_context 和 model_runner_v1 [#1422](https://github.com/vllm-project" +"/vllm-ascend/pull/1422)" + +#: ../../source/user_guide/release_notes.md:1007 +msgid "" +"Fixed sampling params in [#1423](https://github.com/vllm-project/vllm-" +"ascend/pull/1423)" +msgstr "" +"修复采样参数 [#1423](https://github.com/vllm-project/vllm-ascend/pull/1423)" + +#: ../../source/user_guide/release_notes.md:1008 +msgid "" +"Added a switch for enabling NZ layout in weights and enable NZ for GMM. " +"in [#1409](https://github.com/vllm-project/vllm-ascend/pull/1409)" +msgstr "" +"添加权重启用 NZ 布局的开关并为 GMM 启用 NZ [#1409](https://github.com/vllm-" +"project/vllm-ascend/pull/1409)" + +#: ../../source/user_guide/release_notes.md:1009 +msgid "" +"Resolved bug in ascend_forward_context in [#1449](https://github.com" +"/vllm-project/vllm-ascend/pull/1449) [#1554](https://github.com/vllm-" +"project/vllm-ascend/pull/1554) [#1598](https://github.com/vllm-project" +"/vllm-ascend/pull/1598)" +msgstr "" +"修复 ascend_forward_context 中的错误 [#1449](https://github.com/vllm-project" +"/vllm-ascend/pull/1449) [#1554](https://github.com/vllm-project/vllm-" +"ascend/pull/1554) [#1598](https://github.com/vllm-project/vllm-ascend/pull/1598)" + +#: ../../source/user_guide/release_notes.md:1010 +msgid "" +"Address PrefillCacheHit state to fix prefix cache accuracy bug in " +"[#1492](https://github.com/vllm-project/vllm-ascend/pull/1492)" +msgstr "" +"处理 PrefillCacheHit 状态以修复前缀缓存准确性问题 [#1492](https://github.com" +"/vllm-project/vllm-ascend/pull/1492)" + +#: ../../source/user_guide/release_notes.md:1011 +msgid "" +"Fixed load weight error and add new e2e case in " +"[#1651](https://github.com/vllm-project/vllm-ascend/pull/1651)" +msgstr "" +"修复加载权重错误并添加新的端到端测试用例 [#1651](https://github.com/vllm-project" +"/vllm-ascend/pull/1651)" + +#: ../../source/user_guide/release_notes.md:1012 +msgid "" +"Optimized the number of rope-related index selections in deepseek. in " +"[#1614](https://github.com/vllm-project/vllm-ascend/pull/1614)" +msgstr "" +"优化 DeepSeek 中与 rope 相关的索引选择次数 [#1614](https://github.com/vllm-" +"project/vllm-ascend/pull/1614)" + +#: ../../source/user_guide/release_notes.md:1013 +msgid "" +"Added mc2 mask in [#1642](https://github.com/vllm-project/vllm-" +"ascend/pull/1642)" +msgstr "" +"添加 mc2 掩码 [#1642](https://github.com/vllm-project/vllm-ascend/pull/1642)" + +#: ../../source/user_guide/release_notes.md:1014 +msgid "" +"Fixed static EPLB log2phy condition and improve unit test in " +"[#1667](https://github.com/vllm-project/vllm-ascend/pull/1667) " +"[#1896](https://github.com/vllm-project/vllm-ascend/pull/1896) " +"[#2003](https://github.com/vllm-project/vllm-ascend/pull/2003)" +msgstr "" +"修复静态 EPLB log2phy 条件并改进单元测试 [#1667](https://github.com/vllm-project" +"/vllm-ascend/pull/1667) [#1896](https://github.com/vllm-project/vllm-" +"ascend/pull/1896) [#2003](https://github.com/vllm-project/vllm-ascend/pull/2003)" + +#: ../../source/user_guide/release_notes.md:1015 +msgid "" +"Added chunk mc2 for prefill in [#1703](https://github.com/vllm-project" +"/vllm-ascend/pull/1703)" +msgstr "" +"为预填充添加分块 mc2 [#1703](https://github.com/vllm-project/vllm-" +"ascend/pull/1703)" + +#: ../../source/user_guide/release_notes.md:1016 +msgid "" +"Fixed mc2 op GroupCoordinator bug in [#1711](https://github.com/vllm-" +"project/vllm-ascend/pull/1711)" +msgstr "" +"修复 mc2 算子 GroupCoordinator 错误 [#1711](https://github.com/vllm-project" +"/vllm-ascend/pull/1711)" + +#: ../../source/user_guide/release_notes.md:1017 +msgid "" +"Fixed the failure to recognize the actual type of quantization in " +"[#1721](https://github.com/vllm-project/vllm-ascend/pull/1721)" +msgstr "" +"修复无法识别量化实际类型的问题 [#1721](https://github.com/vllm-project/vllm-" +"ascend/pull/1721)" + +#: ../../source/user_guide/release_notes.md:1018 +msgid "" +"Fixed DeepSeek bug when tp_size == 1 in [#1755](https://github.com/vllm-" +"project/vllm-ascend/pull/1755)" +msgstr "" +"修复 tp_size == 1 时的 DeepSeek 错误 [#1755](https://github.com/vllm-project" +"/vllm-ascend/pull/1755)" + +#: ../../source/user_guide/release_notes.md:1019 +msgid "" +"Added support for delay-free blocks in prefill nodes in " +"[#1691](https://github.com/vllm-project/vllm-ascend/pull/1691)" +msgstr "" +"在预填充节点中添加对无延迟块的支持 [#1691](https://github.com/vllm-project/vllm-" +"ascend/pull/1691)" + +#: ../../source/user_guide/release_notes.md:1020 +msgid "" +"MoE alltoallv communication optimization for unquantized RL training & " +"alltoallv support dpo in [#1547](https://github.com/vllm-project/vllm-" +"ascend/pull/1547)" +msgstr "" +"针对未量化 RL 训练的 MoE alltoallv 通信优化 & alltoallv 支持 dpo " +"[#1547](https://github.com/vllm-project/vllm-ascend/pull/1547)" + +#: ../../source/user_guide/release_notes.md:1021 +msgid "" +"Adapted dispatchV2 interface in [#1822](https://github.com/vllm-project" +"/vllm-ascend/pull/1822)" +msgstr "" +"适配 dispatchV2 接口 [#1822](https://github.com/vllm-project/vllm-" +"ascend/pull/1822)" + +#: ../../source/user_guide/release_notes.md:1022 +msgid "" +"Fixed disaggregate prefill hang issue in long output in " +"[#1807](https://github.com/vllm-project/vllm-ascend/pull/1807)" +msgstr "" +"修复长输出中 disaggregate prefill 挂起问题 [#1807](https://github.com/vllm-" +"project/vllm-ascend/pull/1807)" + +#: ../../source/user_guide/release_notes.md:1023 +msgid "" +"Fixed flashcomm_v1 when engine v0 in [#1859](https://github.com/vllm-" +"project/vllm-ascend/pull/1859)" +msgstr "" +"修复 engine v0 时的 flashcomm_v1 问题 [#1859](https://github.com/vllm-project" +"/vllm-ascend/pull/1859)" + +#: ../../source/user_guide/release_notes.md:1024 +msgid "" +"ep_group is not equal to word_size in some cases in " +"[#1862](https://github.com/vllm-project/vllm-ascend/pull/1862)." +msgstr "" +"在某些情况下 ep_group 不等于 word_size [#1862](https://github.com/vllm-project" +"/vllm-ascend/pull/1862)." + +#: ../../source/user_guide/release_notes.md:1025 +msgid "" +"Fixed wheel glibc version incompatibility in [#1808](https://github.com" +"/vllm-project/vllm-ascend/pull/1808)." +msgstr "" +"修复 wheel glibc 版本不兼容问题 [#1808](https://github.com/vllm-project/vllm-" +"ascend/pull/1808)." + +#: ../../source/user_guide/release_notes.md:1026 +msgid "" +"Fixed mc2 process group to resolve self.cpu_group is None in " +"[#1831](https://github.com/vllm-project/vllm-ascend/pull/1831)." +msgstr "" +"修复 mc2 进程组以解决 self.cpu_group 为 None 的问题 [#1831](https://github.com" +"/vllm-project/vllm-ascend/pull/1831)." + +#: ../../source/user_guide/release_notes.md:1027 +msgid "" +"Pin vllm version to v0.9.1 to make mypy check passed in " +"[#1904](https://github.com/vllm-project/vllm-ascend/pull/1904)." +msgstr "" +"将 vllm 版本固定为 v0.9.1 以使 mypy 检查通过 [#1904](https://github.com/vllm-" +"project/vllm-ascend/pull/1904)。" + +#: ../../source/user_guide/release_notes.md:1028 +msgid "" +"Applied npu_moe_gating_top_k_softmax for moe to improve perf in " +"[#1902](https://github.com/vllm-project/vllm-ascend/pull/1902)." +msgstr "" +"为 MoE 应用了 npu_moe_gating_top_k_softmax 以提升性能 [#1902](https://github.com" +"/vllm-project/vllm-ascend/pull/1902)。" + +#: ../../source/user_guide/release_notes.md:1029 +msgid "" +"Fixed bug in path_decorator when engine v0 in [#1919](https://github.com" +"/vllm-project/vllm-ascend/pull/1919)." +msgstr "" +"修复了 engine v0 中 path_decorator 的 bug [#1919](https://github.com/vllm-" +"project/vllm-ascend/pull/1919)。" + +#: ../../source/user_guide/release_notes.md:1030 +msgid "" +"Avoid performing cpu all_reduce in disaggregated-prefill scenario in " +"[#1644](https://github.com/vllm-project/vllm-ascend/pull/1644)." +msgstr "" +"在解耦预填充场景中避免执行 CPU all_reduce [#1644](https://github.com/vllm-" +"project/vllm-ascend/pull/1644)。" + +#: ../../source/user_guide/release_notes.md:1031 +msgid "" +"Added super kernel in decode MoE in [#1916](https://github.com/vllm-" +"project/vllm-ascend/pull/1916)" +msgstr "" +"在解码 MoE 中添加了超级内核 [#1916](https://github.com/vllm-project/vllm-" +"ascend/pull/1916)" + +#: ../../source/user_guide/release_notes.md:1032 +msgid "" +"[Prefill Perf] Parallel Strategy Optimizations (VRAM-for-Speed Tradeoff) " +"in [#1802](https://github.com/vllm-project/vllm-ascend/pull/1802)." +msgstr "" +"[预填充性能] 并行策略优化(显存换速度权衡) [#1802](https://github.com/vllm-project" +"/vllm-ascend/pull/1802)。" + +#: ../../source/user_guide/release_notes.md:1033 +msgid "" +"Removed unnecessary reduce_results access in shared_experts.down_proj in " +"[#2016](https://github.com/vllm-project/vllm-ascend/pull/2016)." +msgstr "" +"移除了 shared_experts.down_proj 中不必要的 reduce_results 访问 [#2016](https://github.com" +"/vllm-project/vllm-ascend/pull/2016)。" + +#: ../../source/user_guide/release_notes.md:1034 +msgid "" +"Optimized greedy reject sampler with vectorization in " +"[#2002](https://github.com/vllm-project/vllm-ascend/pull/2002)." +msgstr "" +"通过向量化优化了贪婪拒绝采样器 [#2002](https://github.com/vllm-project/vllm-" +"ascend/pull/2002)。" + +#: ../../source/user_guide/release_notes.md:1035 +msgid "" +"Made multiple Ps and Ds work on a single machine in " +"[#1936](https://github.com/vllm-project/vllm-ascend/pull/1936)." +msgstr "" +"使多个 P 节点和 D 节点能在单机上工作 [#1936](https://github.com/vllm-project/vllm-" +"ascend/pull/1936)。" + +#: ../../source/user_guide/release_notes.md:1036 +msgid "" +"Fixed the shape conflicts between shared & routed experts for deepseek " +"model when tp > 1 and multistream_moe enabled in " +"[#2075](https://github.com/vllm-project/vllm-ascend/pull/2075)." +msgstr "" +"修复了 deepseek 模型在 tp > 1 且启用 multistream_moe 时,共享专家与路由专家之间的形状冲突 " +"[#2075](https://github.com/vllm-project/vllm-ascend/pull/2075)。" + +#: ../../source/user_guide/release_notes.md:1037 +msgid "" +"Added CPU binding support [#2031](https://github.com/vllm-project/vllm-" +"ascend/pull/2031)." +msgstr "" +"添加了 CPU 绑定支持 [#2031](https://github.com/vllm-project/vllm-ascend/pull/2031)。" + +#: ../../source/user_guide/release_notes.md:1038 +msgid "" +"Added with_prefill cpu allreduce to handle D-node recomputation in " +"[#2129](https://github.com/vllm-project/vllm-ascend/pull/2129)." +msgstr "" +"添加了 with_prefill CPU allreduce 以处理 D 节点重计算 [#2129](https://github.com" +"/vllm-project/vllm-ascend/pull/2129)。" + +#: ../../source/user_guide/release_notes.md:1039 +msgid "" +"Added D2H & initRoutingQuantV2 to improve prefill perf in " +"[#2038](https://github.com/vllm-project/vllm-ascend/pull/2038)." +msgstr "" +"添加了 D2H 和 initRoutingQuantV2 以提升预填充性能 [#2038](https://github.com/vllm-" +"project/vllm-ascend/pull/2038)。" + +#: ../../source/user_guide/release_notes.md:1043 +msgid "" +"Provide an e2e guide for execute duration profiling " +"[#1113](https://github.com/vllm-project/vllm-ascend/pull/1113)" +msgstr "" +"提供了执行时长性能分析的端到端指南 [#1113](https://github.com/vllm-project/vllm-" +"ascend/pull/1113)" + +#: ../../source/user_guide/release_notes.md:1044 +msgid "" +"Add Referer header for CANN package download url. " +"[#1192](https://github.com/vllm-project/vllm-ascend/pull/1192)" +msgstr "" +"为 CANN 包下载 URL 添加 Referer 请求头 [#1192](https://github.com/vllm-project" +"/vllm-ascend/pull/1192)" + +#: ../../source/user_guide/release_notes.md:1045 +msgid "" +"Add reinstall instructions doc [#1370](https://github.com/vllm-project" +"/vllm-ascend/pull/1370)" +msgstr "" +"添加了重新安装说明文档 [#1370](https://github.com/vllm-project/vllm-ascend/pull/1370)" + +#: ../../source/user_guide/release_notes.md:1046 +msgid "" +"Update Disaggregate prefill README [#1379](https://github.com/vllm-" +"project/vllm-ascend/pull/1379)" +msgstr "" +"更新了解耦预填充 README [#1379](https://github.com/vllm-project/vllm-ascend/pull/1379)" + +#: ../../source/user_guide/release_notes.md:1047 +msgid "" +"Disaggregate prefill for kv cache register style " +"[#1296](https://github.com/vllm-project/vllm-ascend/pull/1296)" +msgstr "" +"支持 KV 缓存寄存器风格解耦预填充 [#1296](https://github.com/vllm-project/vllm-" +"ascend/pull/1296)" + +#: ../../source/user_guide/release_notes.md:1048 +msgid "" +"Fix errors and non-standard parts in " +"examples/disaggregate_prefill_v1/README.md in [#1965](https://github.com" +"/vllm-project/vllm-ascend/pull/1965)" +msgstr "" +"修复了 examples/disaggregate_prefill_v1/README.md 中的错误和非标准部分 " +"[#1965](https://github.com/vllm-project/vllm-ascend/pull/1965)" + +#: ../../source/user_guide/release_notes.md:1052 +msgid "" +"Full graph mode support are not yet available for specific hardware types" +" with full_cuda_graphenable. [#2182](https://github.com/vllm-project" +"/vllm-ascend/issues/2182)" +msgstr "" +"对于特定硬件类型,使用 full_cuda_graphenable 的全图模式支持尚不可用 [#2182](https://github.com" +"/vllm-project/vllm-ascend/issues/2182)。" + +#: ../../source/user_guide/release_notes.md:1053 +msgid "" +"Qwen3 MoE aclgraph mode with tp failed when enable ep due to bincount " +"error [#2226](https://github.com/vllm-project/vllm-ascend/issues/2226)" +msgstr "" +"启用 EP 时,带 TP 的 Qwen3 MoE aclgraph 模式因 bincount 错误而失败 " +"[#2226](https://github.com/vllm-project/vllm-ascend/issues/2226)" + +#: ../../source/user_guide/release_notes.md:1054 +msgid "" +"As mentioned in the v0.9.1rc1 release note, Atlas 300I series support " +"will NOT be included." +msgstr "如 v0.9.1rc1 版本说明所述,将不包含对 Atlas 300I 系列的支持。" + +#: ../../source/user_guide/release_notes.md:1056 +msgid "v0.9.2rc1 - 2025.07.11" +msgstr "v0.9.2rc1 - 2025年7月11日" + +#: ../../source/user_guide/release_notes.md:1058 +msgid "" +"This is the 1st release candidate of v0.9.2 for vLLM Ascend. Please " +"follow the [official doc](https://github.com/vllm-project/vllm-" +"ascend/tree/v0.9.2rc1) to get started. From this release, V1 engine will " +"be enabled by default, there is no need to set `VLLM_USE_V1=1` any more. " +"And this release is the last version to support V0 engine, V0 code will " +"be clean up in the future." +msgstr "" +"这是 vLLM Ascend v0.9.2 的第一个候选发布版本。请遵循[官方文档](https://github.com/vllm-" +"project/vllm-ascend/tree/v0.9.2rc1)开始使用。从本版本起,V1 引擎将默认启用,不再需要设置 " +"`VLLM_USE_V1=1`。并且本版本是最后一个支持 V0 引擎的版本,V0 代码将在未来被清理。" + +#: ../../source/user_guide/release_notes.md:1062 +msgid "" +"Pooling model works with V1 engine now. You can take a try with Qwen3 " +"embedding model [#1359](https://github.com/vllm-project/vllm-" +"ascend/pull/1359)." +msgstr "" +"Pooling 模型现在可与 V1 引擎协同工作。您可以尝试使用 Qwen3 embedding 模型 " +"[#1359](https://github.com/vllm-project/vllm-ascend/pull/1359)。" + +#: ../../source/user_guide/release_notes.md:1063 +msgid "" +"The performance on Atlas 300I series has been improved. " +"[#1591](https://github.com/vllm-project/vllm-ascend/pull/1591)" +msgstr "" +"Atlas 300I 系列的性能已得到提升 [#1591](https://github.com/vllm-project/vllm-" +"ascend/pull/1591)。" + +#: ../../source/user_guide/release_notes.md:1064 +msgid "" +"aclgraph mode works with Moe models now. Currently, only Qwen3 Moe is " +"well tested. [#1381](https://github.com/vllm-project/vllm-" +"ascend/pull/1381)" +msgstr "" +"aclgraph 模式现在可与 MoE 模型协同工作。目前,仅对 Qwen3 MoE 进行了充分测试 " +"[#1381](https://github.com/vllm-project/vllm-ascend/pull/1381)。" + +#: ../../source/user_guide/release_notes.md:1068 +msgid "" +"Ascend PyTorch adapter (torch_npu) has been upgraded to " +"`2.5.1.post1.dev20250619`. Don’t forget to update it in your environment." +" [#1347](https://github.com/vllm-project/vllm-ascend/pull/1347)" +msgstr "" +"Ascend PyTorch 适配器 (torch_npu) 已升级至 `2.5.1.post1.dev20250619`。请勿忘记在您的环境中更新它 " +"[#1347](https://github.com/vllm-project/vllm-ascend/pull/1347)。" + +#: ../../source/user_guide/release_notes.md:1069 +msgid "" +"The GatherV3 error has been fixed with aclgraph mode. " +"[#1416](https://github.com/vllm-project/vllm-ascend/pull/1416)" +msgstr "" +"GatherV3 错误已在 aclgraph 模式下修复 [#1416](https://github.com/vllm-project/vllm-" +"ascend/pull/1416)。" + +#: ../../source/user_guide/release_notes.md:1070 +msgid "" +"W8A8 quantization works on Atlas 300I series now. " +"[#1560](https://github.com/vllm-project/vllm-ascend/pull/1560)" +msgstr "" +"W8A8 量化现在可在 Atlas 300I 系列上运行 [#1560](https://github.com/vllm-project" +"/vllm-ascend/pull/1560)。" + +#: ../../source/user_guide/release_notes.md:1071 msgid "" "Fix the accuracy problem with deploy models with parallel parameters. " "[#1678](https://github.com/vllm-project/vllm-ascend/pull/1678)" msgstr "" -"修复了使用并行参数部署模型时的准确性问题。[#1678](https://github.com/vllm-" -"project/vllm-ascend/pull/1678)" +"修复了部署带有并行参数的模型时的精度问题 [#1678](https://github.com/vllm-project/vllm-" +"ascend/pull/1678)。" -#: ../../user_guide/release_notes.md:17 +#: ../../source/user_guide/release_notes.md:1072 msgid "" -"The pre-built wheel package now requires lower version of glibc. Users can " -"use it by `pip install vllm-ascend` directly. [#1582](https://github.com/" -"vllm-project/vllm-ascend/pull/1582)" +"The pre-built wheel package now requires lower version of glibc. Users " +"can use it by `pip install vllm-ascend` directly. " +"[#1582](https://github.com/vllm-project/vllm-ascend/pull/1582)" msgstr "" -"预编译的 wheel 包现在要求更低版本的 glibc。用户可以直接通过 `pip install " -"vllm-ascend` 使用它。[#1582](https://github.com/vllm-project/vllm-ascend/" -"pull/1582)" +"预构建的 wheel 包现在需要更低版本的 glibc。用户可以直接通过 `pip install vllm-ascend` 使用它 " +"[#1582](https://github.com/vllm-project/vllm-ascend/pull/1582)。" -#: ../../user_guide/release_notes.md:19 ../../user_guide/release_notes.md:99 -#: ../../user_guide/release_notes.md:153 ../../user_guide/release_notes.md:177 -#: ../../user_guide/release_notes.md:195 ../../user_guide/release_notes.md:219 -#: ../../user_guide/release_notes.md:242 ../../user_guide/release_notes.md:266 -#: ../../user_guide/release_notes.md:296 -msgid "Other" -msgstr "其它" - -#: ../../user_guide/release_notes.md:20 +#: ../../source/user_guide/release_notes.md:1076 msgid "" -"Official doc has been updated for better read experience. For example, more " -"deployment tutorials are added, user/developer docs are updated. More guide " -"will coming soon." +"Official doc has been updated for better read experience. For example, " +"more deployment tutorials are added, user/developer docs are updated. " +"More guide will coming soon." msgstr "" -"官方文档已更新,以提升阅读体验。例如,增加了更多部署教程,用户/开发者文档已" -"更新。更多指南即将推出。" +"官方文档已更新,以提供更好的阅读体验。例如,添加了更多部署教程,更新了用户/开发者文档。更多指南即将推出。" -#: ../../user_guide/release_notes.md:21 +#: ../../source/user_guide/release_notes.md:1077 msgid "" -"Fix accuracy problem for deepseek V3/R1 models with torchair graph in long " -"sequence predictions. [#1331](https://github.com/vllm-project/vllm-ascend/" -"pull/1331)" +"Fix accuracy problem for deepseek V3/R1 models with torchair graph in " +"long sequence predictions. [#1331](https://github.com/vllm-project/vllm-" +"ascend/pull/1331)" msgstr "" -"修复 deepseek V3/R1 模型在使用 torchair 图进行长序列预测时的精度问题。" -"[#1331](https://github.com/vllm-project/vllm-ascend/pull/1331)" +"修复了 deepseek V3/R1 模型在使用 torchair 图进行长序列预测时的精度问题 " +"[#1331](https://github.com/vllm-project/vllm-ascend/pull/1331)。" -#: ../../user_guide/release_notes.md:22 +#: ../../source/user_guide/release_notes.md:1078 msgid "" -"A new env variable `VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP` has been added. " -"It enables the fused allgather-experts kernel for Deepseek V3/R1 models. " -"The default value is `0`. [#1335](https://github.com/vllm-project/vllm-" -"ascend/pull/1335)" +"A new env variable `VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP` has been " +"added. It enables the fused allgather-experts kernel for Deepseek V3/R1 " +"models. The default value is `0`. [#1335](https://github.com/vllm-project" +"/vllm-ascend/pull/1335)" msgstr "" -"新增了一个环境变量 `VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP`。它用于启用 " -"Deepseek V3/R1 模型的 fused allgather-experts 内核。默认值为 `0`。[#1335]" -"(https://github.com/vllm-project/vllm-ascend/pull/1335)" +"新增了一个环境变量 `VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP`。它为 Deepseek V3/R1 模型启用了融合的 " +"allgather-experts 内核。默认值为 `0` [#1335](https://github.com/vllm-project/vllm-" +"ascend/pull/1335)。" -#: ../../user_guide/release_notes.md:23 +#: ../../source/user_guide/release_notes.md:1079 msgid "" "A new env variable `VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION` has been " -"added to improve the performance of topk-topp sampling. The default value " -"is 0, we'll consider to enable it by default in the future[#1732](https://" -"github.com/vllm-project/vllm-ascend/pull/1732)" +"added to improve the performance of topk-topp sampling. The default value" +" is 0, we'll consider to enable it by default in the " +"future[#1732](https://github.com/vllm-project/vllm-ascend/pull/1732)" msgstr "" -"新增了一个环境变量 `VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION`,用于提升 " -"topk-topp 采样的性能。该变量默认值为 0,未来我们会考虑默认启用此选项[#1732]" -"(https://github.com/vllm-project/vllm-ascend/pull/1732)。" +"新增了一个环境变量 `VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION`,用于提升 topk-topp " +"采样的性能。默认值为 0,未来我们将考虑默认启用它 [#1732](https://github.com/vllm-project/vllm-" +"ascend/pull/1732)。" -#: ../../user_guide/release_notes.md:24 +#: ../../source/user_guide/release_notes.md:1080 msgid "" -"A batch of bugs have been fixed for Data Parallelism case [#1273](https://" -"github.com/vllm-project/vllm-ascend/pull/1273) [#1322](https://github.com/" -"vllm-project/vllm-ascend/pull/1322) [#1275](https://github.com/vllm-project/" -"vllm-ascend/pull/1275) [#1478](https://github.com/vllm-project/vllm-ascend/" -"pull/1478)" -msgstr "" -"已修复了一批与数据并行相关的 bug [#1273](https://github.com/vllm-project/" -"vllm-ascend/pull/1273) [#1322](https://github.com/vllm-project/vllm-ascend/" -"pull/1322) [#1275](https://github.com/vllm-project/vllm-ascend/pull/1275) " +"A batch of bugs have been fixed for Data Parallelism case " +"[#1273](https://github.com/vllm-project/vllm-ascend/pull/1273) " +"[#1322](https://github.com/vllm-project/vllm-ascend/pull/1322) " +"[#1275](https://github.com/vllm-project/vllm-ascend/pull/1275) " "[#1478](https://github.com/vllm-project/vllm-ascend/pull/1478)" - -#: ../../user_guide/release_notes.md:25 -msgid "" -"The DeepSeek performance has been improved. [#1194](https://github.com/vllm-" -"project/vllm-ascend/pull/1194) [#1395](https://github.com/vllm-project/vllm-" -"ascend/pull/1395) [#1380](https://github.com/vllm-project/vllm-ascend/" -"pull/1380)" msgstr "" -"DeepSeek 的性能已得到提升。[#1194](https://github.com/vllm-project/vllm-" -"ascend/pull/1194) [#1395](https://github.com/vllm-project/vllm-ascend/" -"pull/1395) [#1380](https://github.com/vllm-project/vllm-ascend/pull/1380)" +"已修复一批数据并行场景下的 bug [#1273](https://github.com/vllm-project/vllm-" +"ascend/pull/1273) [#1322](https://github.com/vllm-project/vllm-" +"ascend/pull/1322) [#1275](https://github.com/vllm-project/vllm-" +"ascend/pull/1275) [#1478](https://github.com/vllm-project/vllm-" +"ascend/pull/1478)。" -#: ../../user_guide/release_notes.md:26 +#: ../../source/user_guide/release_notes.md:1081 msgid "" -"Ascend scheduler works with prefix cache now. [#1446](https://github.com/" -"vllm-project/vllm-ascend/pull/1446)" +"The DeepSeek performance has been improved. [#1194](https://github.com" +"/vllm-project/vllm-ascend/pull/1194) [#1395](https://github.com/vllm-" +"project/vllm-ascend/pull/1395) [#1380](https://github.com/vllm-project" +"/vllm-ascend/pull/1380)" msgstr "" -"Ascend 调度器现在支持前缀缓存。[#1446](https://github.com/vllm-project/vllm-" +"DeepSeek 的性能已得到提升 [#1194](https://github.com/vllm-project/vllm-" +"ascend/pull/1194) [#1395](https://github.com/vllm-project/vllm-" +"ascend/pull/1395) [#1380](https://github.com/vllm-project/vllm-" +"ascend/pull/1380)。" + +#: ../../source/user_guide/release_notes.md:1082 +msgid "" +"Ascend scheduler works with prefix cache now. [#1446](https://github.com" +"/vllm-project/vllm-ascend/pull/1446)" +msgstr "" +"Ascend 调度器现已支持前缀缓存。[#1446](https://github.com/vllm-project/vllm-" "ascend/pull/1446)" -#: ../../user_guide/release_notes.md:27 +#: ../../source/user_guide/release_notes.md:1083 msgid "" -"DeepSeek now works with prefix cache now. [#1498](https://github.com/vllm-" -"project/vllm-ascend/pull/1498)" +"DeepSeek now works with prefix cache now. [#1498](https://github.com" +"/vllm-project/vllm-ascend/pull/1498)" msgstr "" -"DeepSeek 现在支持前缀缓存了。[#1498](https://github.com/vllm-project/vllm-" +"DeepSeek 现已支持前缀缓存。[#1498](https://github.com/vllm-project/vllm-" "ascend/pull/1498)" -#: ../../user_guide/release_notes.md:28 +#: ../../source/user_guide/release_notes.md:1084 msgid "" -"Support prompt logprobs to recover ceval accuracy in V1 [#1483](https://" -"github.com/vllm-project/vllm-ascend/pull/1483)" +"Support prompt logprobs to recover ceval accuracy in V1 " +"[#1483](https://github.com/vllm-project/vllm-ascend/pull/1483)" msgstr "" -"支持使用 prompt logprobs 恢复 V1 的 ceval 准确率 [#1483](https://github.com/" -"vllm-project/vllm-ascend/pull/1483)" +"支持使用 prompt logprobs 恢复 V1 版本的 ceval 准确率 [#1483](https://github.com/vllm-" +"project/vllm-ascend/pull/1483)" -#: ../../user_guide/release_notes.md:30 +#: ../../source/user_guide/release_notes.md:1088 +msgid "" +"Pipeline parallel does not work with ray and graph mode: " +" " +"" +msgstr "" +"流水线并行目前无法与 ray 和图模式协同工作: " + +#: ../../source/user_guide/release_notes.md:1090 +#: ../../source/user_guide/release_notes.md:1149 +msgid "New Contributors" +msgstr "新贡献者" + +#: ../../source/user_guide/release_notes.md:1092 +msgid "" +"@xleoken made their first contribution in " +msgstr "" +"@xleoken 在 中完成了首次贡献" + +#: ../../source/user_guide/release_notes.md:1093 +msgid "" +"@lyj-jjj made their first contribution in " +msgstr "" +"@lyj-jjj 在 中完成了首次贡献" + +#: ../../source/user_guide/release_notes.md:1094 +msgid "" +"@sharonyunyun made their first contribution in " +msgstr "" +"@sharonyunyun 在 中完成了首次贡献" + +#: ../../source/user_guide/release_notes.md:1095 +msgid "" +"@Pr0Wh1teGivee made their first contribution in " +msgstr "" +"@Pr0Wh1teGivee 在 中完成了首次贡献" + +#: ../../source/user_guide/release_notes.md:1096 +msgid "" +"@leo-pony made their first contribution in " +msgstr "" +"@leo-pony 在 中完成了首次贡献" + +#: ../../source/user_guide/release_notes.md:1097 +msgid "" +"@zeshengzong made their first contribution in " +msgstr "" +"@zeshengzong 在 中完成了首次贡献" + +#: ../../source/user_guide/release_notes.md:1098 +msgid "" +"@GDzhu01 made their first contribution in " +msgstr "" +"@GDzhu01 在 中完成了首次贡献" + +#: ../../source/user_guide/release_notes.md:1099 +msgid "" +"@Agonixiaoxiao made their first contribution in " +msgstr "" +"@Agonixiaoxiao 在 中完成了首次贡献" + +#: ../../source/user_guide/release_notes.md:1100 +msgid "" +"@zhanghw0354 made their first contribution in " +msgstr "" +"@zhanghw0354 在 中完成了首次贡献" + +#: ../../source/user_guide/release_notes.md:1101 +msgid "" +"@farawayboat made their first contribution in " +msgstr "" +"@farawayboat 在 中完成了首次贡献" + +#: ../../source/user_guide/release_notes.md:1102 +msgid "" +"@ZhengWG made their first contribution in " +msgstr "" +"@ZhengWG 在 中完成了首次贡献" + +#: ../../source/user_guide/release_notes.md:1103 +msgid "" +"@wm901115nwpu made their first contribution in " +msgstr "" +"@wm901115nwpu 在 中完成了首次贡献" + +#: ../../source/user_guide/release_notes.md:1105 +msgid "" +"**Full Changelog**: " +msgstr "" +"**完整更新日志**:" + +#: ../../source/user_guide/release_notes.md:1107 msgid "v0.9.1rc1 - 2025.06.22" msgstr "v0.9.1rc1 - 2025.06.22" -#: ../../user_guide/release_notes.md:32 +#: ../../source/user_guide/release_notes.md:1109 msgid "" -"This is the 1st release candidate of v0.9.1 for vLLM Ascend. Please follow " -"the [official doc](https://vllm-ascend.readthedocs.io/en/) to get started." +"This is the 1st release candidate of v0.9.1 for vLLM Ascend. Please " +"follow the [official doc](https://github.com/vllm-project/vllm-" +"ascend/tree/v0.9.1rc1) to get started." msgstr "" -"这是 vLLM Ascend v0.9.1 的第一个候选发布版本。请按照[官方文档](https://vllm-" -"ascend.readthedocs.io/en/)开始使用。" +"这是 vLLM Ascend v0.9.1 的第一个候选发布版本。请按照[官方文档](https://github.com/vllm-" +"project/vllm-ascend/tree/v0.9.1rc1)开始使用。" -#: ../../user_guide/release_notes.md:36 -msgid "" -"Atlas 300I series is experimental supported in this release. [#1333]" -"(https://github.com/vllm-project/vllm-ascend/pull/1333) After careful " -"consideration, this feature **will NOT be included in v0.9.1-dev branch** " -"taking into account the v0.9.1 release quality and the feature rapid " -"iteration to improve performance on Atlas 300I series. We will improve this " -"from 0.9.2rc1 and later." -msgstr "" -"本版本对 Atlas 300I 系列提供了实验性支持。[#1333](https://github.com/vllm-" -"project/vllm-ascend/pull/1333) 经过慎重考虑,鉴于 v0.9.1 版本发布的质量要求" -"以及 Atlas 300I 系列性能优化的快速迭代,该功能**不会被包含在 v0.9.1-dev 分支" -"中**。我们将在 0.9.2rc1 及之后的版本中进一步完善该功能。" +#: ../../source/user_guide/release_notes.md:1111 +msgid "Experimental" +msgstr "实验性功能" -#: ../../user_guide/release_notes.md:37 +#: ../../source/user_guide/release_notes.md:1113 msgid "" -"Support EAGLE-3 for speculative decoding. [#1032](https://github.com/vllm-" -"project/vllm-ascend/pull/1032)" +"Atlas 300I series is experimental supported in this release (Functional " +"test passed with Qwen2.5-7b-" +"instruct/Qwen2.5-0.5b/Qwen3-0.6B/Qwen3-4B/Qwen3-8B). " +"[#1333](https://github.com/vllm-project/vllm-ascend/pull/1333)" msgstr "" -"支持 EAGLE-3 进行推测式解码。[#1032](https://github.com/vllm-project/vllm-" +"本版本实验性支持 Atlas 300I 系列(已通过 Qwen2.5-7b-instruct/Qwen2.5-0.5b/Qwen3-0.6B/Qwen3-4B/Qwen3-8B 的功能测试)。[#1333](https://github.com/vllm-project/vllm-ascend/pull/1333)" + +#: ../../source/user_guide/release_notes.md:1114 +msgid "" +"Support EAGLE-3 for speculative decoding. [#1032](https://github.com" +"/vllm-project/vllm-ascend/pull/1032)" +msgstr "" +"支持使用 EAGLE-3 进行推测式解码。[#1032](https://github.com/vllm-project/vllm-" "ascend/pull/1032)" -#: ../../user_guide/release_notes.md:40 +#: ../../source/user_guide/release_notes.md:1116 msgid "" -"Ascend PyTorch adapter (torch_npu) has been upgraded to `2.5.1.post1." -"dev20250528`. Don’t forget to update it in your environment. [#1235]" -"(https://github.com/vllm-project/vllm-ascend/pull/1235)" +"After careful consideration, above features **will NOT be included in " +"v0.9.1-dev branch (v0.9.1 final release)** taking into account the v0.9.1" +" release quality and the feature rapid iteration. We will improve this " +"from 0.9.2rc1 and later." msgstr "" -"Ascend PyTorch 适配器(torch_npu)已升级到 `2.5.1.post1.dev20250528`。请不要" -"忘记在您的环境中进行更新。[#1235](https://github.com/vllm-project/vllm-" -"ascend/pull/1235)" +"经过慎重考虑,鉴于 v0.9.1 版本的发布质量要求以及功能的快速迭代,上述功能**将不会被包含在 v0.9.1-dev 分支(即 v0.9.1 最终版本)中**。我们将在 0.9.2rc1 及之后的版本中对其进行改进。" -#: ../../user_guide/release_notes.md:41 +#: ../../source/user_guide/release_notes.md:1120 msgid "" -"Support Atlas 300I series container image. You can get it from [quay.io]" -"(https://quay.io/repository/vllm/vllm-ascend)" +"Ascend PyTorch adapter (torch_npu) has been upgraded to " +"`2.5.1.post1.dev20250528`. Don’t forget to update it in your environment." +" [#1235](https://github.com/vllm-project/vllm-ascend/pull/1235)" msgstr "" -"支持Atlas 300I系列的容器镜像。你可以从[quay.io](https://quay.io/repository/" -"vllm/vllm-ascend)获取。" +"Ascend PyTorch 适配器(torch_npu)已升级至 `2.5.1.post1.dev20250528`。请勿忘记在您的环境中更新它。[#1235](https://github.com/vllm-project/vllm-ascend/pull/1235)" -#: ../../user_guide/release_notes.md:42 +#: ../../source/user_guide/release_notes.md:1121 msgid "" -"Fix token-wise padding mechanism to make multi-card graph mode work. [#1300]" -"(https://github.com/vllm-project/vllm-ascend/pull/1300)" +"Support Atlas 300I series container image. You can get it from " +"[quay.io](https://quay.io/repository/vllm/vllm-ascend)" msgstr "" -"修复按 token 填充机制以支持多卡图模式。 [#1300](https://github.com/vllm-" -"project/vllm-ascend/pull/1300)" +"支持 Atlas 300I 系列容器镜像。您可以从 [quay.io](https://quay.io/repository/vllm/vllm-ascend) 获取。" -#: ../../user_guide/release_notes.md:43 +#: ../../source/user_guide/release_notes.md:1122 msgid "" -"Upgrade vllm to 0.9.1 [#1165]https://github.com/vllm-project/vllm-ascend/" -"pull/1165" +"Fix token-wise padding mechanism to make multi-card graph mode work. " +"[#1300](https://github.com/vllm-project/vllm-ascend/pull/1300)" msgstr "" -"将 vllm 升级到 0.9.1 [#1165]https://github.com/vllm-project/vllm-ascend/" -"pull/1165" +"修复了按 token 填充的机制,使多卡图模式能够正常工作。[#1300](https://github.com/vllm-project/vllm-ascend/pull/1300)" -#: ../../user_guide/release_notes.md:45 +#: ../../source/user_guide/release_notes.md:1123 +msgid "" +"Upgrade vLLM to 0.9.1 [#1165](https://github.com/vllm-project/vllm-" +"ascend/pull/1165)" +msgstr "" +"将 vLLM 升级至 0.9.1 [#1165](https://github.com/vllm-project/vllm-ascend/pull/1165)" + +#: ../../source/user_guide/release_notes.md:1125 msgid "Other Improvements" msgstr "其他改进" -#: ../../user_guide/release_notes.md:46 +#: ../../source/user_guide/release_notes.md:1127 msgid "" "Initial support Chunked Prefill for MLA. [#1172](https://github.com/vllm-" "project/vllm-ascend/pull/1172)" msgstr "" -"为MLA初步支持分块预填充。 [#1172](https://github.com/vllm-project/vllm-" -"ascend/pull/1172)" +"为 MLA 初步支持分块预填充。[#1172](https://github.com/vllm-project/vllm-ascend/pull/1172)" -#: ../../user_guide/release_notes.md:47 +#: ../../source/user_guide/release_notes.md:1128 msgid "" "An example of best practices to run DeepSeek with ETP has been added. " "[#1101](https://github.com/vllm-project/vllm-ascend/pull/1101)" msgstr "" -"已新增一个使用 ETP 运行 DeepSeek 的最佳实践示例。[#1101](https://github.com/" -"vllm-project/vllm-ascend/pull/1101)" +"已添加一个使用 ETP 运行 DeepSeek 的最佳实践示例。[#1101](https://github.com/vllm-project/vllm-ascend/pull/1101)" -#: ../../user_guide/release_notes.md:48 +#: ../../source/user_guide/release_notes.md:1129 msgid "" -"Performance improvements for DeepSeek using the TorchAir graph. [#1098]" -"(https://github.com/vllm-project/vllm-ascend/pull/1098), [#1131](https://" -"github.com/vllm-project/vllm-ascend/pull/1131)" +"Performance improvements for DeepSeek using the TorchAir graph. " +"[#1098](https://github.com/vllm-project/vllm-ascend/pull/1098), " +"[#1131](https://github.com/vllm-project/vllm-ascend/pull/1131)" msgstr "" -"通过使用 TorchAir 图对 DeepSeek 进行了性能提升。[#1098](https://github.com/" -"vllm-project/vllm-ascend/pull/1098), [#1131](https://github.com/vllm-" -"project/vllm-ascend/pull/1131)" +"通过使用 TorchAir 图,提升了 DeepSeek 的性能。[#1098](https://github.com/vllm-project/vllm-ascend/pull/1098), [#1131](https://github.com/vllm-project/vllm-ascend/pull/1131)" -#: ../../user_guide/release_notes.md:49 +#: ../../source/user_guide/release_notes.md:1130 msgid "" -"Supports the speculative decoding feature with AscendScheduler. [#943]" -"(https://github.com/vllm-project/vllm-ascend/pull/943)" +"Supports the speculative decoding feature with AscendScheduler. " +"[#943](https://github.com/vllm-project/vllm-ascend/pull/943)" msgstr "" -"支持 AscendScheduler 的预测性解码功能。[#943](https://github.com/vllm-" -"project/vllm-ascend/pull/943)" +"支持 AscendScheduler 的推测式解码功能。[#943](https://github.com/vllm-project/vllm-ascend/pull/943)" -#: ../../user_guide/release_notes.md:50 +#: ../../source/user_guide/release_notes.md:1131 msgid "" -"Improve `VocabParallelEmbedding` custom op performance. It will be enabled " -"in the next release. [#796](https://github.com/vllm-project/vllm-ascend/" -"pull/796)" +"Improve `VocabParallelEmbedding` custom op performance. It will be " +"enabled in the next release. [#796](https://github.com/vllm-project/vllm-" +"ascend/pull/796)" msgstr "" -"提升 `VocabParallelEmbedding` 自定义算子的性能。该优化将在下一个版本中启用。" -"[#796](https://github.com/vllm-project/vllm-ascend/pull/796)" +"提升了 `VocabParallelEmbedding` 自定义算子的性能。该优化将在下一个版本中启用。[#796](https://github.com/vllm-project/vllm-ascend/pull/796)" -#: ../../user_guide/release_notes.md:51 +#: ../../source/user_guide/release_notes.md:1132 msgid "" "Fixed a device discovery and setup bug when running vLLM Ascend on Ray " "[#884](https://github.com/vllm-project/vllm-ascend/pull/884)" msgstr "" -"修复了在 Ray 上运行 vLLM Ascend 时的设备发现和设置错误 [#884](https://" -"github.com/vllm-project/vllm-ascend/pull/884)" +"修复了在 Ray 上运行 vLLM Ascend 时的设备发现和设置错误。[#884](https://github.com/vllm-project/vllm-ascend/pull/884)" -#: ../../user_guide/release_notes.md:52 +#: ../../source/user_guide/release_notes.md:1133 msgid "" -"DeepSeek with [MC2](https://www.hiascend.com/document/detail/zh/" -"canncommercial/81RC1/developmentguide/opdevg/ascendcbestP/" -"atlas_ascendc_best_practices_10_0043.html) (Merged Compute and " -"Communication) now works properly. [#1268](https://github.com/vllm-project/" -"vllm-ascend/pull/1268)" -msgstr "" -"DeepSeek 现已可以与 [MC2](https://www.hiascend.com/document/detail/zh/" -"canncommercial/81RC1/developmentguide/opdevg/ascendcbestP/" -"atlas_ascendc_best_practices_10_0043.html)(计算与通信融合)正常工作。" +"DeepSeek with " +"[MC2](https://www.hiascend.com/document/detail/zh/canncommercial/81RC1/developmentguide/opdevg/ascendcbestP/atlas_ascendc_best_practices_10_0043.html)" +" (Merged Compute and Communication) now works properly. " "[#1268](https://github.com/vllm-project/vllm-ascend/pull/1268)" - -#: ../../user_guide/release_notes.md:53 -msgid "" -"Fixed log2phy NoneType bug with static EPLB feature. [#1186](https://github." -"com/vllm-project/vllm-ascend/pull/1186)" msgstr "" -"修复了带有静态 EPLB 特性时 log2phy 为 NoneType 的 bug。[#1186](https://" -"github.com/vllm-project/vllm-ascend/pull/1186)" +"DeepSeek 现已可以与 " +"[MC2](https://www.hiascend.com/document/detail/zh/canncommercial/81RC1/developmentguide/opdevg/ascendcbestP/atlas_ascendc_best_practices_10_0043.html)(计算与通信融合)正常工作。[#1268](https://github.com/vllm-project/vllm-ascend/pull/1268)" -#: ../../user_guide/release_notes.md:54 +#: ../../source/user_guide/release_notes.md:1134 msgid "" -"Improved performance for DeepSeek with DBO enabled. [#997](https://github." -"com/vllm-project/vllm-ascend/pull/997), [#1135](https://github.com/vllm-" -"project/vllm-ascend/pull/1135)" +"Fixed log2phy NoneType bug with static EPLB feature. " +"[#1186](https://github.com/vllm-project/vllm-ascend/pull/1186)" msgstr "" -"启用 DBO 后,DeepSeek 的性能得到提升。[#997](https://github.com/vllm-" -"project/vllm-ascend/pull/997),[#1135](https://github.com/vllm-project/vllm-" -"ascend/pull/1135)" +"修复了启用静态 EPLB 功能时出现的 log2phy NoneType 错误。[#1186](https://github.com/vllm-project/vllm-ascend/pull/1186)" -#: ../../user_guide/release_notes.md:55 +#: ../../source/user_guide/release_notes.md:1135 +msgid "" +"Improved performance for DeepSeek with DBO enabled. " +"[#997](https://github.com/vllm-project/vllm-ascend/pull/997), " +"[#1135](https://github.com/vllm-project/vllm-ascend/pull/1135)" +msgstr "" +"启用 DBO 后,DeepSeek 的性能得到提升。[#997](https://github.com/vllm-project/vllm-ascend/pull/997), [#1135](https://github.com/vllm-project/vllm-ascend/pull/1135)" + +#: ../../source/user_guide/release_notes.md:1136 msgid "" "Refactoring AscendFusedMoE [#1229](https://github.com/vllm-project/vllm-" "ascend/pull/1229)" msgstr "" -"重构 AscendFusedMoE [#1229](https://github.com/vllm-project/vllm-ascend/" -"pull/1229)" +"重构 AscendFusedMoE [#1229](https://github.com/vllm-project/vllm-ascend/pull/1229)" -#: ../../user_guide/release_notes.md:56 +#: ../../source/user_guide/release_notes.md:1137 msgid "" -"Add initial user stories page (include LLaMA-Factory/TRL/verl/MindIE Turbo/" -"GPUStack) [#1224](https://github.com/vllm-project/vllm-ascend/pull/1224)" +"Add initial user stories page (include LLaMA-Factory/TRL/verl/MindIE " +"Turbo/GPUStack) [#1224](https://github.com/vllm-project/vllm-" +"ascend/pull/1224)" msgstr "" -"新增初始用户故事页面(包括 LLaMA-Factory/TRL/verl/MindIE Turbo/GPUStack)" -"[#1224](https://github.com/vllm-project/vllm-ascend/pull/1224)" +"新增初始用户案例页面(包含 LLaMA-Factory/TRL/verl/MindIE Turbo/GPUStack)[#1224](https://github.com/vllm-project/vllm-ascend/pull/1224)" -#: ../../user_guide/release_notes.md:57 +#: ../../source/user_guide/release_notes.md:1138 msgid "" -"Add unit test framework [#1201](https://github.com/vllm-project/vllm-ascend/" -"pull/1201)" -msgstr "" -"添加单元测试框架 [#1201](https://github.com/vllm-project/vllm-ascend/" -"pull/1201)" +"Add unit test framework [#1201](https://github.com/vllm-project/vllm-" +"ascend/pull/1201)" +msgstr "添加单元测试框架 [#1201](https://github.com/vllm-project/vllm-ascend/pull/1201)" -#: ../../user_guide/release_notes.md:59 -msgid "Known Issues" -msgstr "已知问题" - -#: ../../user_guide/release_notes.md:60 +#: ../../source/user_guide/release_notes.md:1142 msgid "" "In some cases, the vLLM process may crash with a **GatherV3** error when " "**aclgraph** is enabled. We are working on this issue and will fix it in " -"the next release. [#1038](https://github.com/vllm-project/vllm-ascend/" -"issues/1038)" +"the next release. [#1038](https://github.com/vllm-project/vllm-" +"ascend/issues/1038)" msgstr "" -"在某些情况下,当启用 **aclgraph** 时,vLLM 进程可能会因 **GatherV3** 错误而" -"崩溃。我们正在解决此问题,并将在下一个版本中修复。[#1038](https://github." -"com/vllm-project/vllm-ascend/issues/1038)" +"在某些情况下,当启用 **aclgraph** 时,vLLM 进程可能会因 **GatherV3** " +"错误而崩溃。我们正在解决此问题,并将在下一个版本中修复。[#1038](https://github.com/vllm-project/vllm-" +"ascend/issues/1038)" -#: ../../user_guide/release_notes.md:61 +#: ../../source/user_guide/release_notes.md:1143 msgid "" "Prefix cache feature does not work with the Ascend Scheduler but without " -"chunked prefill enabled. This will be fixed in the next release. [#1350]" -"(https://github.com/vllm-project/vllm-ascend/issues/1350)" +"chunked prefill enabled. This will be fixed in the next release. " +"[#1350](https://github.com/vllm-project/vllm-ascend/issues/1350)" msgstr "" -"前缀缓存功能在未启用分块预填充的情况下无法与 Ascend 调度器一同工作。此问题将" -"在下一个版本中修复。[#1350](https://github.com/vllm-project/vllm-ascend/" -"issues/1350)" +"前缀缓存功能在未启用分块预填充的情况下无法与 Ascend " +"调度器一同工作。此问题将在下一个版本中修复。[#1350](https://github.com/vllm-project/vllm-" +"ascend/issues/1350)" -#: ../../user_guide/release_notes.md:63 +#: ../../source/user_guide/release_notes.md:1145 msgid "Full Changelog" msgstr "完整更新日志" -#: ../../user_guide/release_notes.md:64 +#: ../../source/user_guide/release_notes.md:1147 msgid "" -"https://github.com/vllm-project/vllm-ascend/compare/v0.9.0rc2...v0.9.1rc1" +"" msgstr "" -"https://github.com/vllm-project/vllm-ascend/compare/v0.9.0rc2...v0.9.1rc1" +"" -#: ../../user_guide/release_notes.md:66 +#: ../../source/user_guide/release_notes.md:1151 +msgid "" +"@farawayboat made their first contribution in " +msgstr "" +"@farawayboat 在 中完成了首次贡献" + +#: ../../source/user_guide/release_notes.md:1152 +msgid "" +"@yzim made their first contribution in " +msgstr "" +"@yzim 在 中完成了首次贡献" + +#: ../../source/user_guide/release_notes.md:1153 +msgid "" +"@chenwaner made their first contribution in " +msgstr "" +"@chenwaner 在 中完成了首次贡献" + +#: ../../source/user_guide/release_notes.md:1154 +msgid "" +"@wangyanhui-cmss made their first contribution in " +msgstr "" +"@wangyanhui-cmss 在 中完成了首次贡献" + +#: ../../source/user_guide/release_notes.md:1155 +msgid "" +"@songshanhu07 made their first contribution in " +msgstr "" +"@songshanhu07 在 中完成了首次贡献" + +#: ../../source/user_guide/release_notes.md:1156 +msgid "" +"@yuancaoyaoHW made their first contribution in " +msgstr "" +"@yuancaoyaoHW 在 中完成了首次贡献" + +#: ../../source/user_guide/release_notes.md:1158 +msgid "" +"**Full Changelog**: " +msgstr "" +"**完整更新日志**: " + +#: ../../source/user_guide/release_notes.md:1160 msgid "v0.9.0rc2 - 2025.06.10" msgstr "v0.9.0rc2 - 2025.06.10" -#: ../../user_guide/release_notes.md:68 +#: ../../source/user_guide/release_notes.md:1162 msgid "" "This release contains some quick fixes for v0.9.0rc1. Please use this " "release instead of v0.9.0rc1." -msgstr "" -"本次发布包含了一些针对 v0.9.0rc1 的快速修复。请使用本次发布版本,而不是 " -"v0.9.0rc1。" +msgstr "本次发布包含了一些针对 v0.9.0rc1 的快速修复。请使用本次发布版本,而不是 v0.9.0rc1。" -#: ../../user_guide/release_notes.md:72 +#: ../../source/user_guide/release_notes.md:1166 msgid "" "Fix the import error when vllm-ascend is installed without editable way. " "[#1152](https://github.com/vllm-project/vllm-ascend/pull/1152)" msgstr "" -"修复当以非可编辑方式安装 vllm-ascend 时的导入错误。[#1152](https://github." -"com/vllm-project/vllm-ascend/pull/1152)" +"修复当以非可编辑方式安装 vllm-ascend 时的导入错误。[#1152](https://github.com/vllm-project" +"/vllm-ascend/pull/1152)" -#: ../../user_guide/release_notes.md:74 +#: ../../source/user_guide/release_notes.md:1168 msgid "v0.9.0rc1 - 2025.06.09" msgstr "v0.9.0rc1 - 2025.06.09" -#: ../../user_guide/release_notes.md:76 +#: ../../source/user_guide/release_notes.md:1170 msgid "" -"This is the 1st release candidate of v0.9.0 for vllm-ascend. Please follow " -"the [official doc](https://vllm-ascend.readthedocs.io/en/) to start the " -"journey. From this release, V1 Engine is recommended to use. The code of V0 " -"Engine is frozen and will not be maintained any more. Please set " -"environment `VLLM_USE_V1=1` to enable V1 Engine." +"This is the 1st release candidate of v0.9.0 for vllm-ascend. Please " +"follow the [official doc](https://github.com/vllm-project/vllm-" +"ascend/tree/v0.9.0rc1) to start the journey. From this release, V1 Engine" +" is recommended to use. The code of V0 Engine is frozen and will not be " +"maintained any more. Please set environment `VLLM_USE_V1=1` to enable V1 " +"Engine." msgstr "" -"这是 vllm-ascend v0.9.0 的第一个候选发布版本。请按照[官方文档](https://vllm-" -"ascend.readthedocs.io/en/)开始使用。从此版本起,推荐使用 V1 引擎。V0 引擎的" -"代码已被冻结,不再维护。如需启用 V1 引擎,请设置环境变量 `VLLM_USE_V1=1`。" +"这是 vllm-ascend v0.9.0 的第一个候选发布版本。请按照[官方文档](https://github.com/vllm-project/vllm-" +"ascend/tree/v0.9.0rc1)开始使用。从此版本起,推荐使用 V1 引擎。V0 引擎的代码已被冻结,不再维护。如需启用 V1" +" 引擎,请设置环境变量 `VLLM_USE_V1=1`。" -#: ../../user_guide/release_notes.md:80 +#: ../../source/user_guide/release_notes.md:1174 msgid "" -"DeepSeek works with graph mode now. Follow the [official doc](https://vllm-" -"ascend.readthedocs.io/en/latest/user_guide/feature_guide/graph_mode.html) " -"to take a try. [#789](https://github.com/vllm-project/vllm-ascend/pull/789)" +"DeepSeek works with graph mode now. Follow the [official " +"doc](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/graph_mode.html)" +" to take a try. [#789](https://github.com/vllm-project/vllm-" +"ascend/pull/789)" msgstr "" -"DeepSeek 现在已支持图模式。请按照[官方文档](https://vllm-ascend.readthedocs." -"io/en/latest/user_guide/feature_guide/graph_mode.html)进行尝试。[#789]" -"(https://github.com/vllm-project/vllm-ascend/pull/789)" +"DeepSeek 现在已支持图模式。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/graph_mode.html)进行尝试。[#789](https://github.com" +"/vllm-project/vllm-ascend/pull/789)" -#: ../../user_guide/release_notes.md:81 +#: ../../source/user_guide/release_notes.md:1175 msgid "" -"Qwen series models works with graph mode now. It works by default with V1 " -"Engine. Please note that in this release, only Qwen series models are well " -"tested with graph mode. We'll make it stable and generalize in the next " -"release. If you hit any issues, please feel free to open an issue on GitHub " -"and fallback to eager mode temporarily by set `enforce_eager=True` when " -"initializing the model." +"Qwen series models work with graph mode now. It works by default with V1 " +"Engine. Please note that in this release, only Qwen series models are " +"well tested with graph mode. We'll make it stable and generalize in the " +"next release. If you hit any issues, please feel free to open an issue on" +" GitHub and fallback to eager mode temporarily by set " +"`enforce_eager=True` when initializing the model." msgstr "" -"Qwen 系列模型现在支持图模式。默认情况下,它在 V1 引擎下运行。请注意,本次发" -"布中,仅 Qwen 系列模型经过了充分的图模式测试。我们将在下一个版本中进一步提升" -"其稳定性并推广至更广泛的场景。如果你遇到任何问题,请随时在 GitHub 上提交 " +"Qwen 系列模型现在支持图模式。默认情况下,它在 V1 引擎下运行。请注意,本次发布中,仅 Qwen " +"系列模型经过了充分的图模式测试。我们将在下一个版本中进一步提升其稳定性并推广至更广泛的场景。如果你遇到任何问题,请随时在 GitHub 上提交 " "issue,并在初始化模型时通过设置 `enforce_eager=True` 临时切换回 eager 模式。" -#: ../../user_guide/release_notes.md:85 +#: ../../source/user_guide/release_notes.md:1179 msgid "" -"The performance of multi-step scheduler has been improved. Thanks for the " -"contribution from China Merchants Bank. [#814](https://github.com/vllm-" +"The performance of multi-step scheduler has been improved. Thanks for the" +" contribution from China Merchants Bank. [#814](https://github.com/vllm-" "project/vllm-ascend/pull/814)" msgstr "" -"多步调度器的性能得到了提升。感谢招商银行的贡献。[#814](https://github.com/" -"vllm-project/vllm-ascend/pull/814)" +"多步调度器的性能得到了提升。感谢招商银行的贡献。[#814](https://github.com/vllm-project/vllm-" +"ascend/pull/814)" -#: ../../user_guide/release_notes.md:86 +#: ../../source/user_guide/release_notes.md:1180 msgid "" -"LoRA、Multi-LoRA And Dynamic Serving is supported for V1 Engine now. Thanks " -"for the contribution from China Merchants Bank. [#893](https://github.com/" -"vllm-project/vllm-ascend/pull/893)" +"LoRA、Multi-LoRA And Dynamic Serving is supported for V1 Engine now. " +"Thanks for the contribution from China Merchants Bank. " +"[#893](https://github.com/vllm-project/vllm-ascend/pull/893)" msgstr "" -"V1 引擎现在支持 LoRA、多 LoRA 以及动态服务。感谢招商银行的贡献。[#893]" -"(https://github.com/vllm-project/vllm-ascend/pull/893)" +"V1 引擎现在支持 LoRA、多 LoRA 以及动态服务。感谢招商银行的贡献。[#893](https://github.com/vllm-" +"project/vllm-ascend/pull/893)" -#: ../../user_guide/release_notes.md:87 +#: ../../source/user_guide/release_notes.md:1181 msgid "" -"Prefix cache and chunked prefill feature works now [#782](https://github." -"com/vllm-project/vllm-ascend/pull/782) [#844](https://github.com/vllm-" -"project/vllm-ascend/pull/844)" +"Prefix cache and chunked prefill feature works now " +"[#782](https://github.com/vllm-project/vllm-ascend/pull/782) " +"[#844](https://github.com/vllm-project/vllm-ascend/pull/844)" msgstr "" -"前缀缓存和分块预填充功能现已可用 [#782](https://github.com/vllm-project/" -"vllm-ascend/pull/782) [#844](https://github.com/vllm-project/vllm-ascend/" -"pull/844)" +"前缀缓存和分块预填充功能现已可用 [#782](https://github.com/vllm-project/vllm-" +"ascend/pull/782) [#844](https://github.com/vllm-project/vllm-" +"ascend/pull/844)" -#: ../../user_guide/release_notes.md:88 +#: ../../source/user_guide/release_notes.md:1182 msgid "" -"Spec decode and MTP features work with V1 Engine now. [#874](https://github." -"com/vllm-project/vllm-ascend/pull/874) [#890](https://github.com/vllm-" -"project/vllm-ascend/pull/890)" +"Spec decode and MTP features work with V1 Engine now. " +"[#874](https://github.com/vllm-project/vllm-ascend/pull/874) " +"[#890](https://github.com/vllm-project/vllm-ascend/pull/890)" msgstr "" -"Spec 解码和 MTP 功能现在已经支持 V1 引擎。[#874](https://github.com/vllm-" -"project/vllm-ascend/pull/874) [#890](https://github.com/vllm-project/vllm-" +"Spec 解码和 MTP 功能现在已经支持 V1 引擎。[#874](https://github.com/vllm-project/vllm-" +"ascend/pull/874) [#890](https://github.com/vllm-project/vllm-" "ascend/pull/890)" -#: ../../user_guide/release_notes.md:89 +#: ../../source/user_guide/release_notes.md:1183 msgid "" -"DP feature works with DeepSeek now. [#1012](https://github.com/vllm-project/" -"vllm-ascend/pull/1012)" -msgstr "" -"DP 功能现在可以与 DeepSeek 一起使用。[#1012](https://github.com/vllm-" +"DP feature works with DeepSeek now. [#1012](https://github.com/vllm-" "project/vllm-ascend/pull/1012)" +msgstr "" +"DP 功能现在可以与 DeepSeek 一起使用。[#1012](https://github.com/vllm-project/vllm-" +"ascend/pull/1012)" -#: ../../user_guide/release_notes.md:90 +#: ../../source/user_guide/release_notes.md:1184 msgid "" -"Input embedding feature works with V0 Engine now. [#916](https://github.com/" -"vllm-project/vllm-ascend/pull/916)" +"Input embedding feature works with V0 Engine now. " +"[#916](https://github.com/vllm-project/vllm-ascend/pull/916)" msgstr "" "输入嵌入特性现在已支持 V0 引擎。[#916](https://github.com/vllm-project/vllm-" "ascend/pull/916)" -#: ../../user_guide/release_notes.md:91 +#: ../../source/user_guide/release_notes.md:1185 msgid "" -"Sleep mode feature works with V1 Engine now. [#1084](https://github.com/" -"vllm-project/vllm-ascend/pull/1084)" +"Sleep mode feature works with V1 Engine now. [#1084](https://github.com" +"/vllm-project/vllm-ascend/pull/1084)" msgstr "" -"休眠模式功能现在已支持 V1 引擎。[#1084](https://github.com/vllm-project/" -"vllm-ascend/pull/1084)" +"休眠模式功能现在已支持 V1 引擎。[#1084](https://github.com/vllm-project/vllm-" +"ascend/pull/1084)" -#: ../../user_guide/release_notes.md:93 ../../user_guide/release_notes.md:149 -#: ../../user_guide/release_notes.md:239 ../../user_guide/release_notes.md:262 -msgid "Model" +#: ../../source/user_guide/release_notes.md:1187 +#: ../../source/user_guide/release_notes.md:1245 +#: ../../source/user_guide/release_notes.md:1346 +#: ../../source/user_guide/release_notes.md:1373 +msgid "Models" msgstr "模型" -#: ../../user_guide/release_notes.md:95 +#: ../../source/user_guide/release_notes.md:1189 msgid "" -"Qwen2.5 VL works with V1 Engine now. [#736](https://github.com/vllm-project/" -"vllm-ascend/pull/736)" -msgstr "" -"Qwen2.5 VL 现在可以与 V1 引擎协同工作。[#736](https://github.com/vllm-" +"Qwen2.5 VL works with V1 Engine now. [#736](https://github.com/vllm-" "project/vllm-ascend/pull/736)" - -#: ../../user_guide/release_notes.md:96 -msgid "" -"LLama4 works now. [#740](https://github.com/vllm-project/vllm-ascend/" -"pull/740)" msgstr "" -"LLama4 现在可以使用了。[#740](https://github.com/vllm-project/vllm-ascend/" -"pull/740)" +"Qwen2.5 VL 现在可以与 V1 引擎协同工作。[#736](https://github.com/vllm-project/vllm-" +"ascend/pull/736)" -#: ../../user_guide/release_notes.md:97 +#: ../../source/user_guide/release_notes.md:1190 +msgid "" +"Llama4 works now. [#740](https://github.com/vllm-project/vllm-" +"ascend/pull/740)" +msgstr "" +"Llama4 现在可以使用了。[#740](https://github.com/vllm-project/vllm-" +"ascend/pull/740)" + +#: ../../source/user_guide/release_notes.md:1191 msgid "" "A new kind of DeepSeek model called dual-batch overlap(DBO) is added. " -"Please set `VLLM_ASCEND_ENABLE_DBO=1` to use it. [#941](https://github.com/" -"vllm-project/vllm-ascend/pull/941)" +"Please set `VLLM_ASCEND_ENABLE_DBO=1` to use it. " +"[#941](https://github.com/vllm-project/vllm-ascend/pull/941)" msgstr "" "新增了一种名为双批次重叠(dual-batch overlap,DBO)的 DeepSeek 模型。请设置 " -"`VLLM_ASCEND_ENABLE_DBO=1` 以启用。 [#941](https://github.com/vllm-project/" -"vllm-ascend/pull/941)" +"`VLLM_ASCEND_ENABLE_DBO=1` 以启用。 [#941](https://github.com/vllm-project" +"/vllm-ascend/pull/941)" -#: ../../user_guide/release_notes.md:101 +#: ../../source/user_guide/release_notes.md:1195 msgid "" -"online serve with ascend quantization works now. [#877](https://github.com/" -"vllm-project/vllm-ascend/pull/877)" +"online serve with ascend quantization works now. " +"[#877](https://github.com/vllm-project/vllm-ascend/pull/877)" msgstr "" -"在线服务现已支持Ascend量化。[#877](https://github.com/vllm-project/vllm-" +"在线服务现已支持 Ascend 量化。[#877](https://github.com/vllm-project/vllm-" "ascend/pull/877)" -#: ../../user_guide/release_notes.md:102 +#: ../../source/user_guide/release_notes.md:1196 msgid "" -"A batch of bugs for graph mode and moe model have been fixed. [#773]" -"(https://github.com/vllm-project/vllm-ascend/pull/773) [#771](https://" -"github.com/vllm-project/vllm-ascend/pull/771) [#774](https://github.com/" -"vllm-project/vllm-ascend/pull/774) [#816](https://github.com/vllm-project/" -"vllm-ascend/pull/816) [#817](https://github.com/vllm-project/vllm-ascend/" -"pull/817) [#819](https://github.com/vllm-project/vllm-ascend/pull/819) " -"[#912](https://github.com/vllm-project/vllm-ascend/pull/912) [#897](https://" -"github.com/vllm-project/vllm-ascend/pull/897) [#961](https://github.com/" -"vllm-project/vllm-ascend/pull/961) [#958](https://github.com/vllm-project/" -"vllm-ascend/pull/958) [#913](https://github.com/vllm-project/vllm-ascend/" -"pull/913) [#905](https://github.com/vllm-project/vllm-ascend/pull/905)" +"A batch of bugs for graph mode and moe model have been fixed. " +"[#773](https://github.com/vllm-project/vllm-ascend/pull/773) " +"[#771](https://github.com/vllm-project/vllm-ascend/pull/771) " +"[#774](https://github.com/vllm-project/vllm-ascend/pull/774) " +"[#816](https://github.com/vllm-project/vllm-ascend/pull/816) " +"[#817](https://github.com/vllm-project/vllm-ascend/pull/817) " +"[#819](https://github.com/vllm-project/vllm-ascend/pull/819) " +"[#912](https://github.com/vllm-project/vllm-ascend/pull/912) " +"[#897](https://github.com/vllm-project/vllm-ascend/pull/897) " +"[#961](https://github.com/vllm-project/vllm-ascend/pull/961) " +"[#958](https://github.com/vllm-project/vllm-ascend/pull/958) " +"[#913](https://github.com/vllm-project/vllm-ascend/pull/913) " +"[#905](https://github.com/vllm-project/vllm-ascend/pull/905)" msgstr "" -"已修复一批关于图模式和moe模型的bug。[#773](https://github.com/vllm-project/" -"vllm-ascend/pull/773) [#771](https://github.com/vllm-project/vllm-ascend/" -"pull/771) [#774](https://github.com/vllm-project/vllm-ascend/pull/774) " -"[#816](https://github.com/vllm-project/vllm-ascend/pull/816) [#817](https://" -"github.com/vllm-project/vllm-ascend/pull/817) [#819](https://github.com/" -"vllm-project/vllm-ascend/pull/819) [#912](https://github.com/vllm-project/" -"vllm-ascend/pull/912) [#897](https://github.com/vllm-project/vllm-ascend/" -"pull/897) [#961](https://github.com/vllm-project/vllm-ascend/pull/961) " -"[#958](https://github.com/vllm-project/vllm-ascend/pull/958) [#913](https://" -"github.com/vllm-project/vllm-ascend/pull/913) [#905](https://github.com/" -"vllm-project/vllm-ascend/pull/905)" +"已修复一批关于图模式和 MoE 模型的 bug。[#773](https://github.com/vllm-project/vllm-" +"ascend/pull/773) [#771](https://github.com/vllm-project/vllm-" +"ascend/pull/771) [#774](https://github.com/vllm-project/vllm-" +"ascend/pull/774) [#816](https://github.com/vllm-project/vllm-" +"ascend/pull/816) [#817](https://github.com/vllm-project/vllm-" +"ascend/pull/817) [#819](https://github.com/vllm-project/vllm-" +"ascend/pull/819) [#912](https://github.com/vllm-project/vllm-" +"ascend/pull/912) [#897](https://github.com/vllm-project/vllm-" +"ascend/pull/897) [#961](https://github.com/vllm-project/vllm-" +"ascend/pull/961) [#958](https://github.com/vllm-project/vllm-" +"ascend/pull/958) [#913](https://github.com/vllm-project/vllm-" +"ascend/pull/913) [#905](https://github.com/vllm-project/vllm-" +"ascend/pull/905)" -#: ../../user_guide/release_notes.md:103 +#: ../../source/user_guide/release_notes.md:1197 msgid "" -"A batch of performance improvement PRs have been merged. [#784](https://" -"github.com/vllm-project/vllm-ascend/pull/784) [#803](https://github.com/" -"vllm-project/vllm-ascend/pull/803) [#966](https://github.com/vllm-project/" -"vllm-ascend/pull/966) [#839](https://github.com/vllm-project/vllm-ascend/" -"pull/839) [#970](https://github.com/vllm-project/vllm-ascend/pull/970) " -"[#947](https://github.com/vllm-project/vllm-ascend/pull/947) [#987](https://" -"github.com/vllm-project/vllm-ascend/pull/987) [#1085](https://github.com/" -"vllm-project/vllm-ascend/pull/1085)" +"A batch of performance improvement PRs have been merged. " +"[#784](https://github.com/vllm-project/vllm-ascend/pull/784) " +"[#803](https://github.com/vllm-project/vllm-ascend/pull/803) " +"[#966](https://github.com/vllm-project/vllm-ascend/pull/966) " +"[#839](https://github.com/vllm-project/vllm-ascend/pull/839) " +"[#970](https://github.com/vllm-project/vllm-ascend/pull/970) " +"[#947](https://github.com/vllm-project/vllm-ascend/pull/947) " +"[#987](https://github.com/vllm-project/vllm-ascend/pull/987) " +"[#1085](https://github.com/vllm-project/vllm-ascend/pull/1085)" msgstr "" "一批性能改进的 PR 已被合并。[#784](https://github.com/vllm-project/vllm-" -"ascend/pull/784) [#803](https://github.com/vllm-project/vllm-ascend/" -"pull/803) [#966](https://github.com/vllm-project/vllm-ascend/pull/966) " -"[#839](https://github.com/vllm-project/vllm-ascend/pull/839) [#970](https://" -"github.com/vllm-project/vllm-ascend/pull/970) [#947](https://github.com/" -"vllm-project/vllm-ascend/pull/947) [#987](https://github.com/vllm-project/" -"vllm-ascend/pull/987) [#1085](https://github.com/vllm-project/vllm-ascend/" -"pull/1085)" +"ascend/pull/784) [#803](https://github.com/vllm-project/vllm-" +"ascend/pull/803) [#966](https://github.com/vllm-project/vllm-" +"ascend/pull/966) [#839](https://github.com/vllm-project/vllm-" +"ascend/pull/839) [#970](https://github.com/vllm-project/vllm-" +"ascend/pull/970) [#947](https://github.com/vllm-project/vllm-" +"ascend/pull/947) [#987](https://github.com/vllm-project/vllm-" +"ascend/pull/987) [#1085](https://github.com/vllm-project/vllm-" +"ascend/pull/1085)" -#: ../../user_guide/release_notes.md:104 +#: ../../source/user_guide/release_notes.md:1198 msgid "" -"From this release, binary wheel package will be released as well. [#775]" -"(https://github.com/vllm-project/vllm-ascend/pull/775)" +"From this release, binary wheel package will be released as well. " +"[#775](https://github.com/vllm-project/vllm-ascend/pull/775)" msgstr "" -"从本版本开始,将同时发布二进制 wheel 包。[#775](https://github.com/vllm-" -"project/vllm-ascend/pull/775)" +"从本版本开始,将同时发布二进制 wheel 包。[#775](https://github.com/vllm-project/vllm-" +"ascend/pull/775)" -#: ../../user_guide/release_notes.md:105 +#: ../../source/user_guide/release_notes.md:1199 msgid "" -"The contributor doc site is [added](https://vllm-ascend.readthedocs.io/en/" -"latest/community/contributors.html)" +"The contributor doc site is " +"[added](https://docs.vllm.ai/projects/ascend/en/latest/community/contributors.html)" msgstr "" -"贡献者文档站点已[添加](https://vllm-ascend.readthedocs.io/en/latest/" -"community/contributors.html)" +"贡献者文档站点已[添加](https://docs.vllm.ai/projects/ascend/en/latest/community/contributors.html)" -#: ../../user_guide/release_notes.md:107 -msgid "Known Issue" -msgstr "已知问题" - -#: ../../user_guide/release_notes.md:109 +#: ../../source/user_guide/release_notes.md:1203 msgid "" "In some case, vLLM process may be crashed with aclgraph enabled. We're " "working this issue and it'll be fixed in the next release." -msgstr "" -"在某些情况下,启用 aclgraph 时 vLLM 进程可能会崩溃。我们正在处理这个问题,并" -"将在下一个版本中修复。" +msgstr "在某些情况下,启用 aclgraph 时 vLLM 进程可能会崩溃。我们正在处理这个问题,并将在下一个版本中修复。" -#: ../../user_guide/release_notes.md:110 +#: ../../source/user_guide/release_notes.md:1204 msgid "" "Multi node data-parallel doesn't work with this release. This is a known " -"issue in vllm and has been fixed on main branch. [#18981](https://github." -"com/vllm-project/vllm/pull/18981)" +"issue in vllm and has been fixed on main branch. " +"[#18981](https://github.com/vllm-project/vllm/pull/18981)" msgstr "" -"多节点数据并行在此版本中无法使用。这是 vllm 中已知的问题,并已在主分支中修" -"复。 [#18981](https://github.com/vllm-project/vllm/pull/18981)" +"多节点数据并行在此版本中无法使用。这是 vllm 中已知的问题,并已在主分支中修复。 [#18981](https://github.com" +"/vllm-project/vllm/pull/18981)" -#: ../../user_guide/release_notes.md:112 +#: ../../source/user_guide/release_notes.md:1206 msgid "v0.7.3.post1 - 2025.05.29" msgstr "v0.7.3.post1 - 2025.05.29" -#: ../../user_guide/release_notes.md:114 +#: ../../source/user_guide/release_notes.md:1208 msgid "" -"This is the first post release of 0.7.3. Please follow the [official doc]" -"(https://vllm-ascend.readthedocs.io/en/v0.7.3-dev) to start the journey. It " -"includes the following changes:" +"This is the first post release of 0.7.3. Please follow the [official " +"doc](https://docs.vllm.ai/projects/ascend/en/v0.7.3) to start the " +"journey. It includes the following changes:" msgstr "" -"这是 0.7.3 的第一个补丁发布。请按照[官方文档](https://vllm-ascend." -"readthedocs.io/en/v0.7.3-dev)开始使用。本次更新包括以下更改:" +"这是 0.7.3 版本的第一个补丁发布。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.7.3)开始使用。本次更新包含以下更改:" -#: ../../user_guide/release_notes.md:118 +#: ../../source/user_guide/release_notes.md:1212 msgid "" -"Qwen3 and Qwen3MOE is supported now. The performance and accuracy of Qwen3 " -"is well tested. You can try it now. Mindie Turbo is recommended to improve " -"the performance of Qwen3. [#903](https://github.com/vllm-project/vllm-" -"ascend/pull/903) [#915](https://github.com/vllm-project/vllm-ascend/" -"pull/915)" +"Qwen3 and Qwen3MOE is supported now. The performance and accuracy of " +"Qwen3 is well tested. You can try it now. Mindie Turbo is recommended to " +"improve the performance of Qwen3. [#903](https://github.com/vllm-project" +"/vllm-ascend/pull/903) [#915](https://github.com/vllm-project/vllm-" +"ascend/pull/915)" msgstr "" -"现在已支持 Qwen3 和 Qwen3MOE。Qwen3 的性能和精度已经过充分测试,你可以立即试" -"用。推荐使用 Mindie Turbo 以提升 Qwen3 的性能。[#903](https://github.com/" -"vllm-project/vllm-ascend/pull/903) [#915](https://github.com/vllm-project/" -"vllm-ascend/pull/915)" +"现已支持 Qwen3 和 Qwen3MOE。Qwen3 的性能和精度已通过充分测试,您可以立即试用。推荐使用 Mindie Turbo 以提升 Qwen3 的性能。[#903](https://github.com/vllm-project/vllm-ascend/pull/903) [#915](https://github.com/vllm-project/vllm-ascend/pull/915)" -#: ../../user_guide/release_notes.md:119 +#: ../../source/user_guide/release_notes.md:1213 msgid "" -"Added a new performance guide. The guide aims to help users to improve vllm-" -"ascend performance on system level. It includes OS configuration, library " -"optimization, deploy guide and so on. [#878](https://github.com/vllm-" -"project/vllm-ascend/pull/878) [Doc Link](https://vllm-ascend.readthedocs.io/" -"en/v0.7.3-dev/developer_guide/performance/optimization_and_tuning.html)" +"Added a new performance guide. The guide aims to help users to improve " +"vllm-ascend performance on system level. It includes OS configuration, " +"library optimization, deploy guide and so on. [#878](https://github.com" +"/vllm-project/vllm-ascend/pull/878) [Doc " +"Link](https://docs.vllm.ai/projects/ascend/en/v0.7.3/developer_guide/performance/optimization_and_tuning.html)" msgstr "" -"新增了一个性能指南。该指南旨在帮助用户在系统层面提升 vllm-ascend 的性能。内" -"容包括操作系统配置、库优化、部署指南等。 [#878](https://github.com/vllm-" -"project/vllm-ascend/pull/878) [文档链接](https://vllm-ascend.readthedocs.io/" -"en/v0.7.3-dev/developer_guide/performance/optimization_and_tuning.html)" +"新增了一份性能指南。该指南旨在帮助用户在系统层面提升 vllm-ascend 的性能,内容包括操作系统配置、库优化、部署指南等。[#878](https://github.com/vllm-project/vllm-ascend/pull/878) [文档链接](https://docs.vllm.ai/projects/ascend/en/v0.7.3/developer_guide/performance/optimization_and_tuning.html)" -#: ../../user_guide/release_notes.md:121 -msgid "Bug Fix" -msgstr "漏洞修复" +#: ../../source/user_guide/release_notes.md:1215 +msgid "Bug Fixes" +msgstr "错误修复" -#: ../../user_guide/release_notes.md:123 +#: ../../source/user_guide/release_notes.md:1217 msgid "" "Qwen2.5-VL works for RLHF scenarios now. [#928](https://github.com/vllm-" "project/vllm-ascend/pull/928)" msgstr "" -"Qwen2.5-VL 现在已支持 RLHF 场景。[#928](https://github.com/vllm-project/" -"vllm-ascend/pull/928)" +"Qwen2.5-VL 现已支持 RLHF 场景。[#928](https://github.com/vllm-project/vllm-ascend/pull/928)" -#: ../../user_guide/release_notes.md:124 +#: ../../source/user_guide/release_notes.md:1218 msgid "" -"Users can launch the model from online weights now. e.g. from huggingface " -"or modelscope directly [#858](https://github.com/vllm-project/vllm-ascend/" -"pull/858) [#918](https://github.com/vllm-project/vllm-ascend/pull/918)" +"Users can launch the model from online weights now. e.g. from huggingface" +" or modelscope directly [#858](https://github.com/vllm-project/vllm-" +"ascend/pull/858) [#918](https://github.com/vllm-project/vllm-" +"ascend/pull/918)" msgstr "" -"用户现在可以直接从在线权重启动模型。例如,可以直接从 huggingface 或 " -"modelscope 获取。[#858](https://github.com/vllm-project/vllm-ascend/" -"pull/858) [#918](https://github.com/vllm-project/vllm-ascend/pull/918)" +"用户现在可以直接从在线权重启动模型,例如直接从 huggingface 或 modelscope 获取。[#858](https://github.com/vllm-project/vllm-ascend/pull/858) [#918](https://github.com/vllm-project/vllm-ascend/pull/918)" -#: ../../user_guide/release_notes.md:125 +#: ../../source/user_guide/release_notes.md:1219 msgid "" -"The meaningless log info `UserWorkspaceSize0` has been cleaned. [#911]" -"(https://github.com/vllm-project/vllm-ascend/pull/911)" +"The meaningless log info `UserWorkspaceSize0` has been cleaned. " +"[#911](https://github.com/vllm-project/vllm-ascend/pull/911)" msgstr "" -"无意义的日志信息 `UserWorkspaceSize0` 已被清理。[#911](https://github.com/" -"vllm-project/vllm-ascend/pull/911)" +"无意义的日志信息 `UserWorkspaceSize0` 已被清理。[#911](https://github.com/vllm-project/vllm-ascend/pull/911)" -#: ../../user_guide/release_notes.md:126 +#: ../../source/user_guide/release_notes.md:1220 msgid "" "The log level for `Failed to import vllm_ascend_C` has been changed to " -"`warning` instead of `error`. [#956](https://github.com/vllm-project/vllm-" -"ascend/pull/956)" +"`warning` instead of `error`. [#956](https://github.com/vllm-project" +"/vllm-ascend/pull/956)" msgstr "" -"`Failed to import vllm_ascend_C` 的日志级别已从 `error` 更改为 `warning`。" -"[#956](https://github.com/vllm-project/vllm-ascend/pull/956)" +"`Failed to import vllm_ascend_C` 的日志级别已从 `error` 更改为 `warning`。[#956](https://github.com/vllm-project/vllm-ascend/pull/956)" -#: ../../user_guide/release_notes.md:127 +#: ../../source/user_guide/release_notes.md:1221 msgid "" -"DeepSeek MLA now works with chunked prefill in V1 Engine. Please note that " -"V1 engine in 0.7.3 is just expermential and only for test usage. [#849]" -"(https://github.com/vllm-project/vllm-ascend/pull/849) [#936](https://" -"github.com/vllm-project/vllm-ascend/pull/936)" +"DeepSeek MLA now works with chunked prefill in V1 Engine. Please note " +"that V1 engine in 0.7.3 is just expermential and only for test usage. " +"[#849](https://github.com/vllm-project/vllm-ascend/pull/849) " +"[#936](https://github.com/vllm-project/vllm-ascend/pull/936)" msgstr "" -"DeepSeek MLA 现已在 V1 引擎中支持分块预填充。请注意,0.7.3 版本中的 V1 引擎" -"仅为实验性,仅供测试使用。[#849](https://github.com/vllm-project/vllm-" -"ascend/pull/849) [#936](https://github.com/vllm-project/vllm-ascend/" -"pull/936)" +"DeepSeek MLA 现已在 V1 引擎中支持分块预填充。请注意,0.7.3 版本中的 V1 引擎仅为实验性功能,仅供测试使用。[#849](https://github.com/vllm-project/vllm-ascend/pull/849) [#936](https://github.com/vllm-project/vllm-ascend/pull/936)" -#: ../../user_guide/release_notes.md:129 -msgid "Docs" -msgstr "文档" - -#: ../../user_guide/release_notes.md:131 +#: ../../source/user_guide/release_notes.md:1225 msgid "" -"The benchmark doc is updated for Qwen2.5 and Qwen2.5-VL [#792](https://" -"github.com/vllm-project/vllm-ascend/pull/792)" +"The benchmark doc is updated for Qwen2.5 and Qwen2.5-VL " +"[#792](https://github.com/vllm-project/vllm-ascend/pull/792)" msgstr "" -"基准文档已针对 Qwen2.5 和 Qwen2.5-VL 更新 [#792](https://github.com/vllm-" -"project/vllm-ascend/pull/792)" +"基准测试文档已针对 Qwen2.5 和 Qwen2.5-VL 进行更新 [#792](https://github.com/vllm-project/vllm-ascend/pull/792)" -#: ../../user_guide/release_notes.md:132 +#: ../../source/user_guide/release_notes.md:1226 msgid "" "Add the note to clear that only \"modelscope<1.23.0\" works with 0.7.3. " "[#954](https://github.com/vllm-project/vllm-ascend/pull/954)" msgstr "" -"添加说明,明确只有 \"modelscope<1.23.0\" 能与 0.7.3 一起使用。[#954]" -"(https://github.com/vllm-project/vllm-ascend/pull/954)" +"添加说明,明确指出只有 \"modelscope<1.23.0\" 可与 0.7.3 版本兼容使用。[#954](https://github.com/vllm-project/vllm-ascend/pull/954)" -#: ../../user_guide/release_notes.md:134 +#: ../../source/user_guide/release_notes.md:1228 msgid "v0.7.3 - 2025.05.08" msgstr "v0.7.3 - 2025.05.08" -#: ../../user_guide/release_notes.md:136 ../../user_guide/release_notes.md:277 +#: ../../source/user_guide/release_notes.md:1230 +#: ../../source/user_guide/release_notes.md:1391 msgid "🎉 Hello, World!" msgstr "🎉 你好,世界!" -#: ../../user_guide/release_notes.md:138 +#: ../../source/user_guide/release_notes.md:1232 msgid "" "We are excited to announce the release of 0.7.3 for vllm-ascend. This is " -"the first official release. The functionality, performance, and stability " -"of this release are fully tested and verified. We encourage you to try it " -"out and provide feedback. We'll post bug fix versions in the future if " -"needed. Please follow the [official doc](https://vllm-ascend.readthedocs.io/" -"en/v0.7.3-dev) to start the journey." +"the first official release. The functionality, performance, and stability" +" of this release are fully tested and verified. We encourage you to try " +"it out and provide feedback. We'll post bug fix versions in the future if" +" needed. Please follow the [official " +"doc](https://docs.vllm.ai/projects/ascend/en/v0.7.3) to start the " +"journey." msgstr "" -"我们很高兴地宣布 vllm-ascend 0.7.3 版本正式发布。这是首个正式发布的版本。该" -"版本的功能、性能和稳定性已充分测试和验证。我们鼓励您试用并反馈意见。如有需" -"要,未来我们将发布修复版本。请参阅[官方文档](https://vllm-ascend." -"readthedocs.io/en/v0.7.3-dev)开启您的体验之旅。" +"我们很高兴地宣布 vllm-ascend 0.7.3 版本正式发布。这是首个官方正式版本。该版本的功能、性能和稳定性均已通过全面测试和验证。我们鼓励您试用并提供反馈。如有需要,我们将在未来发布错误修复版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.7.3)开始使用。" -#: ../../user_guide/release_notes.md:141 +#: ../../source/user_guide/release_notes.md:1236 msgid "" "This release includes all features landed in the previous release " -"candidates ([v0.7.1rc1](https://github.com/vllm-project/vllm-ascend/" -"releases/tag/v0.7.1rc1), [v0.7.3rc1](https://github.com/vllm-project/vllm-" -"ascend/releases/tag/v0.7.3rc1), [v0.7.3rc2](https://github.com/vllm-project/" -"vllm-ascend/releases/tag/v0.7.3rc2)). And all the features are fully tested " -"and verified. Visit the official doc the get the detail [feature](https://" -"vllm-ascend.readthedocs.io/en/v0.7.3-dev/user_guide/suppoted_features.html) " -"and [model](https://vllm-ascend.readthedocs.io/en/v0.7.3-dev/user_guide/" -"supported_models.html) support matrix." +"candidates ([v0.7.1rc1](https://github.com/vllm-project/vllm-" +"ascend/releases/tag/v0.7.1rc1), [v0.7.3rc1](https://github.com/vllm-" +"project/vllm-ascend/releases/tag/v0.7.3rc1), " +"[v0.7.3rc2](https://github.com/vllm-project/vllm-" +"ascend/releases/tag/v0.7.3rc2)). And all the features are fully tested " +"and verified. Visit the official doc the get the detail " +"[feature](https://docs.vllm.ai/projects/ascend/en/v0.7.3/user_guide/suppoted_features.html)" +" and " +"[model](https://docs.vllm.ai/projects/ascend/en/v0.7.3/user_guide/supported_models.html)" +" support matrix." msgstr "" -"本次发布包含了所有在之前候选版本中加入的功能([v0.7.1rc1](https://github." -"com/vllm-project/vllm-ascend/releases/tag/v0.7.1rc1)、[v0.7.3rc1](https://" -"github.com/vllm-project/vllm-ascend/releases/tag/v0.7.3rc1)、[v0.7.3rc2]" -"(https://github.com/vllm-project/vllm-ascend/releases/tag/v0.7.3rc2))。所有" -"功能都经过了全面测试和验证。请访问官方文档获取详细的[功能](https://vllm-" -"ascend.readthedocs.io/en/v0.7.3-dev/user_guide/suppoted_features.html)和[模" -"型](https://vllm-ascend.readthedocs.io/en/v0.7.3-dev/user_guide/" -"supported_models.html)支持矩阵。" +"本次发布包含了之前所有候选版本中已实现的功能([v0.7.1rc1](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.7.1rc1)、[v0.7.3rc1](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.7.3rc1)、[v0.7.3rc2](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.7.3rc2))。所有功能均已通过全面测试和验证。请访问官方文档以获取详细的[功能](https://docs.vllm.ai/projects/ascend/en/v0.7.3/user_guide/suppoted_features.html)和[模型](https://docs.vllm.ai/projects/ascend/en/v0.7.3/user_guide/supported_models.html)支持矩阵。" -#: ../../user_guide/release_notes.md:142 +#: ../../source/user_guide/release_notes.md:1237 msgid "" "Upgrade CANN to 8.1.RC1 to enable chunked prefill and automatic prefix " "caching features. You can now enable them now." -msgstr "" -"将 CANN 升级到 8.1.RC1 以启用分块预填充和自动前缀缓存功能。您现在可以启用这" -"些功能了。" +msgstr "将 CANN 升级到 8.1.RC1 以启用分块预填充和自动前缀缓存功能。您现在可以启用这些功能。" -#: ../../user_guide/release_notes.md:143 +#: ../../source/user_guide/release_notes.md:1238 msgid "" -"Upgrade PyTorch to 2.5.1. vLLM Ascend no longer relies on the dev version " -"of torch-npu now. Now users don't need to install the torch-npu by hand. " -"The 2.5.1 version of torch-npu will be installed automatically. [#662]" -"(https://github.com/vllm-project/vllm-ascend/pull/662)" +"Upgrade PyTorch to 2.5.1. vLLM Ascend no longer relies on the dev version" +" of torch-npu now. Now users don't need to install the torch-npu by hand." +" The 2.5.1 version of torch-npu will be installed automatically. " +"[#662](https://github.com/vllm-project/vllm-ascend/pull/662)" msgstr "" -"升级 PyTorch 至 2.5.1。vLLM Ascend 现在不再依赖于 torch-npu 的开发版本。用户" -"现在无需手动安装 torch-npu,2.5.1 版本的 torch-npu 会被自动安装。[#662]" -"(https://github.com/vllm-project/vllm-ascend/pull/662)" +"将 PyTorch 升级至 2.5.1。vLLM Ascend 现在不再依赖 torch-npu 的开发版本。用户无需再手动安装 torch-npu,2.5.1 版本的 torch-npu 将被自动安装。[#662](https://github.com/vllm-project/vllm-ascend/pull/662)" -#: ../../user_guide/release_notes.md:144 +#: ../../source/user_guide/release_notes.md:1239 msgid "" -"Integrate MindIE Turbo into vLLM Ascend to improve DeepSeek V3/R1, Qwen 2 " -"series performance. [#708](https://github.com/vllm-project/vllm-ascend/" -"pull/708)" +"Integrate MindIE Turbo into vLLM Ascend to improve DeepSeek V3/R1, Qwen 2" +" series performance. [#708](https://github.com/vllm-project/vllm-" +"ascend/pull/708)" msgstr "" -"将 MindIE Turbo 集成到 vLLM Ascend 以提升 DeepSeek V3/R1、Qwen 2 系列的性" -"能。[#708](https://github.com/vllm-project/vllm-ascend/pull/708)" +"将 MindIE Turbo 集成到 vLLM Ascend 中,以提升 DeepSeek V3/R1、Qwen 2 系列的性能。[#708](https://github.com/vllm-project/vllm-ascend/pull/708)" -#: ../../user_guide/release_notes.md:147 +#: ../../source/user_guide/release_notes.md:1243 msgid "" -"LoRA、Multi-LoRA And Dynamic Serving is supported now. The performance will " -"be improved in the next release. Please follow the official doc for more " -"usage information. Thanks for the contribution from China Merchants Bank. " -"[#700](https://github.com/vllm-project/vllm-ascend/pull/700)" +"LoRA、Multi-LoRA And Dynamic Serving is supported now. The performance " +"will be improved in the next release. Please follow the official doc for " +"more usage information. Thanks for the contribution from China Merchants " +"Bank. [#700](https://github.com/vllm-project/vllm-ascend/pull/700)" msgstr "" -"现在已经支持 LoRA、多LoRA 和动态服务。下一个版本中性能将会提升。请参阅官方文" -"档以获取更多用法信息。感谢招商银行的贡献。[#700](https://github.com/vllm-" -"project/vllm-ascend/pull/700)" +"现已支持 LoRA、多 LoRA 和动态服务。性能将在下一个版本中得到提升。请参阅官方文档以获取更多使用信息。感谢招商银行的贡献。[#700](https://github.com/vllm-project/vllm-ascend/pull/700)" -#: ../../user_guide/release_notes.md:150 +#: ../../source/user_guide/release_notes.md:1247 msgid "" -"The performance of Qwen2 vl and Qwen2.5 vl is improved. [#702](https://" -"github.com/vllm-project/vllm-ascend/pull/702)" -msgstr "" -"Qwen2 vl 和 Qwen2.5 vl 的性能得到了提升。 [#702](https://github.com/vllm-" -"project/vllm-ascend/pull/702)" +"The performance of Qwen2 vl and Qwen2.5 vl is improved. " +"[#702](https://github.com/vllm-project/vllm-ascend/pull/702)" +msgstr "Qwen2 vl 和 Qwen2.5 vl 的性能已得到提升。[#702](https://github.com/vllm-project/vllm-ascend/pull/702)" -#: ../../user_guide/release_notes.md:151 +#: ../../source/user_guide/release_notes.md:1248 msgid "" -"The performance of `apply_penalties` and `topKtopP` ops are improved. [#525]" -"(https://github.com/vllm-project/vllm-ascend/pull/525)" -msgstr "" -"`apply_penalties` 和 `topKtopP` 操作的性能得到了提升。 [#525](https://" -"github.com/vllm-project/vllm-ascend/pull/525)" +"The performance of `apply_penalties` and `topKtopP` ops are improved. " +"[#525](https://github.com/vllm-project/vllm-ascend/pull/525)" +msgstr "`apply_penalties` 和 `topKtopP` 操作的性能已得到提升。[#525](https://github.com/vllm-project/vllm-ascend/pull/525)" -#: ../../user_guide/release_notes.md:154 +#: ../../source/user_guide/release_notes.md:1252 msgid "" -"Fixed a issue that may lead CPU memory leak. [#691](https://github.com/vllm-" -"project/vllm-ascend/pull/691) [#712](https://github.com/vllm-project/vllm-" -"ascend/pull/712)" -msgstr "" -"修复了可能导致CPU内存泄漏的问题。 [#691](https://github.com/vllm-project/" -"vllm-ascend/pull/691) [#712](https://github.com/vllm-project/vllm-ascend/" -"pull/712)" +"Fixed a issue that may lead CPU memory leak. [#691](https://github.com" +"/vllm-project/vllm-ascend/pull/691) [#712](https://github.com/vllm-" +"project/vllm-ascend/pull/712)" +msgstr "修复了一个可能导致 CPU 内存泄漏的问题。[#691](https://github.com/vllm-project/vllm-ascend/pull/691) [#712](https://github.com/vllm-project/vllm-ascend/pull/712)" -#: ../../user_guide/release_notes.md:155 +#: ../../source/user_guide/release_notes.md:1253 msgid "" "A new environment `SOC_VERSION` is added. If you hit any soc detection " -"error when building with custom ops enabled, please set `SOC_VERSION` to a " -"suitable value. [#606](https://github.com/vllm-project/vllm-ascend/pull/606)" -msgstr "" -"新增了一个环境变量 `SOC_VERSION`。如果在启用自定义算子时构建过程中遇到 soc " -"检测错误,请将 `SOC_VERSION` 设置为合适的值。[#606](https://github.com/vllm-" -"project/vllm-ascend/pull/606)" +"error when building with custom ops enabled, please set `SOC_VERSION` to " +"a suitable value. [#606](https://github.com/vllm-project/vllm-" +"ascend/pull/606)" +msgstr "新增了一个环境变量 `SOC_VERSION`。如果在启用自定义算子构建时遇到任何 SoC 检测错误,请将 `SOC_VERSION` 设置为合适的值。[#606](https://github.com/vllm-project/vllm-ascend/pull/606)" -#: ../../user_guide/release_notes.md:156 +#: ../../source/user_guide/release_notes.md:1254 msgid "" -"openEuler container image supported with v0.7.3-openeuler tag. [#665]" -"(https://github.com/vllm-project/vllm-ascend/pull/665)" -msgstr "" -"openEuler 容器镜像已支持 v0.7.3-openeuler 标签。[#665](https://github.com/" -"vllm-project/vllm-ascend/pull/665)" +"openEuler container image supported with v0.7.3-openeuler tag. " +"[#665](https://github.com/vllm-project/vllm-ascend/pull/665)" +msgstr "现已支持带有 v0.7.3-openeuler 标签的 openEuler 容器镜像。[#665](https://github.com/vllm-project/vllm-ascend/pull/665)" -#: ../../user_guide/release_notes.md:157 +#: ../../source/user_guide/release_notes.md:1255 msgid "" -"Prefix cache feature works on V1 engine now. [#559](https://github.com/vllm-" -"project/vllm-ascend/pull/559)" -msgstr "" -"前缀缓存功能现在已在 V1 引擎上工作。[#559](https://github.com/vllm-project/" -"vllm-ascend/pull/559)" +"Prefix cache feature works on V1 engine now. [#559](https://github.com" +"/vllm-project/vllm-ascend/pull/559)" +msgstr "前缀缓存功能现已在 V1 引擎上正常工作。[#559](https://github.com/vllm-project/vllm-ascend/pull/559)" -#: ../../user_guide/release_notes.md:159 +#: ../../source/user_guide/release_notes.md:1257 msgid "v0.8.5rc1 - 2025.05.06" msgstr "v0.8.5rc1 - 2025.05.06" -#: ../../user_guide/release_notes.md:161 +#: ../../source/user_guide/release_notes.md:1259 msgid "" -"This is the 1st release candidate of v0.8.5 for vllm-ascend. Please follow " -"the [official doc](https://vllm-ascend.readthedocs.io/en/) to start the " -"journey. Now you can enable V1 egnine by setting the environment variable " -"`VLLM_USE_V1=1`, see the feature support status of vLLM Ascend in [here]" -"(https://vllm-ascend.readthedocs.io/en/latest/user_guide/support_matrix/" -"supported_features.html)." +"This is the 1st release candidate of v0.8.5 for vllm-ascend. Please " +"follow the [official doc](https://github.com/vllm-project/vllm-" +"ascend/tree/v0.8.5rc1) to start the journey. Now you can enable V1 egnine" +" by setting the environment variable `VLLM_USE_V1=1`, see the feature " +"support status of vLLM Ascend in [supported_features](https://github.com" +"/vllm-project/vllm-" +"ascend/blob/v0.8.5rc1/docs/source/user_guide/suppoted_features.md)." msgstr "" -"这是 vllm-ascend v0.8.5 的第一个候选发布版本。请按照[官方文档](https://vllm-" -"ascend.readthedocs.io/en/)开始使用。现在,您可以通过设置环境变量 " -"`VLLM_USE_V1=1` 启用 V1 引擎。关于 vLLM Ascend 的特性支持情况,请参见[这里]" -"(https://vllm-ascend.readthedocs.io/en/latest/user_guide/support_matrix/" -"supported_features.html)。" +"这是 vllm-ascend v0.8.5 的第一个候选发布版本。请按照[官方文档](https://github.com/vllm-project/vllm-ascend/tree/v0.8.5rc1)开始使用。现在,您可以通过设置环境变量 `VLLM_USE_V1=1` 来启用 V1 引擎。vLLM Ascend 的功能支持状态请参见 [supported_features](https://github.com/vllm-project/vllm-ascend/blob/v0.8.5rc1/docs/source/user_guide/suppoted_features.md)。" -#: ../../user_guide/release_notes.md:164 +#: ../../source/user_guide/release_notes.md:1263 msgid "" "Upgrade CANN version to 8.1.RC1 to support chunked prefill and automatic " -"prefix caching (`--enable_prefix_caching`) when V1 is enabled [#747]" -"(https://github.com/vllm-project/vllm-ascend/pull/747)" -msgstr "" -"将 CANN 版本升级到 8.1.RC1,以支持在启用 V1 时的分块预填充和自动前缀缓存" -"(`--enable_prefix_caching`)[#747](https://github.com/vllm-project/vllm-" -"ascend/pull/747)" +"prefix caching (`--enable_prefix_caching`) when V1 is enabled " +"[#747](https://github.com/vllm-project/vllm-ascend/pull/747)" +msgstr "将 CANN 版本升级到 8.1.RC1,以支持在启用 V1 时的分块预填充和自动前缀缓存(`--enable_prefix_caching`)功能[#747](https://github.com/vllm-project/vllm-ascend/pull/747)" -#: ../../user_guide/release_notes.md:165 +#: ../../source/user_guide/release_notes.md:1264 msgid "" -"Optimize Qwen2 VL and Qwen 2.5 VL [#701](https://github.com/vllm-project/" -"vllm-ascend/pull/701)" -msgstr "" -"优化 Qwen2 VL 和 Qwen 2.5 VL [#701](https://github.com/vllm-project/vllm-" -"ascend/pull/701)" +"Optimize Qwen2 VL and Qwen 2.5 VL [#701](https://github.com/vllm-project" +"/vllm-ascend/pull/701)" +msgstr "优化 Qwen2 VL 和 Qwen 2.5 VL [#701](https://github.com/vllm-project/vllm-ascend/pull/701)" -#: ../../user_guide/release_notes.md:166 -#, python-brace-format +#: ../../source/user_guide/release_notes.md:1265 msgid "" -"Improve Deepseek V3 eager mode and graph mode performance, now you can use " -"--additional_config={'enable_graph_mode': True} to enable graph mode. [#598]" -"(https://github.com/vllm-project/vllm-ascend/pull/598) [#719](https://" -"github.com/vllm-project/vllm-ascend/pull/719)" -msgstr "" -"改进了 Deepseek V3 的 eager 模式和图模式性能,现在你可以使用 --" -"additional_config={'enable_graph_mode': True} 来启用图模式。[#598](https://" -"github.com/vllm-project/vllm-ascend/pull/598) [#719](https://github.com/" -"vllm-project/vllm-ascend/pull/719)" +"Improve Deepseek V3 eager mode and graph mode performance, now you can " +"use --additional_config={'enable_graph_mode': True} to enable graph mode." +" [#598](https://github.com/vllm-project/vllm-ascend/pull/598) " +"[#719](https://github.com/vllm-project/vllm-ascend/pull/719)" +msgstr "改进了 Deepseek V3 的 eager 模式和图模式性能,现在您可以使用 --additional_config={'enable_graph_mode': True} 来启用图模式。[#598](https://github.com/vllm-project/vllm-ascend/pull/598) [#719](https://github.com/vllm-project/vllm-ascend/pull/719)" -#: ../../user_guide/release_notes.md:169 +#: ../../source/user_guide/release_notes.md:1269 msgid "" "Upgrade vLLM to 0.8.5.post1 [#715](https://github.com/vllm-project/vllm-" "ascend/pull/715)" -msgstr "" -"将 vLLM 升级到 0.8.5.post1 [#715](https://github.com/vllm-project/vllm-" -"ascend/pull/715)" +msgstr "将 vLLM 升级到 0.8.5.post1 [#715](https://github.com/vllm-project/vllm-ascend/pull/715)" -#: ../../user_guide/release_notes.md:170 +#: ../../source/user_guide/release_notes.md:1270 msgid "" -"Fix early return in CustomDeepseekV2MoE.forward during profile_run [#682]" -"(https://github.com/vllm-project/vllm-ascend/pull/682)" -msgstr "" -"修复在 profile_run 期间 CustomDeepseekV2MoE.forward 过早返回的问题 [#682]" -"(https://github.com/vllm-project/vllm-ascend/pull/682)" +"Fix early return in CustomDeepseekV2MoE.forward during profile_run " +"[#682](https://github.com/vllm-project/vllm-ascend/pull/682)" +msgstr "修复了在 profile_run 期间 CustomDeepseekV2MoE.forward 过早返回的问题 [#682](https://github.com/vllm-project/vllm-ascend/pull/682)" -#: ../../user_guide/release_notes.md:171 +#: ../../source/user_guide/release_notes.md:1271 msgid "" -"Adapts for new quant model generated by modelslim [#719](https://github.com/" -"vllm-project/vllm-ascend/pull/719)" -msgstr "" -"适配由 modelslim 生成的新量化模型 [#719](https://github.com/vllm-project/" -"vllm-ascend/pull/719)" +"Adapts for new quant model generated by modelslim " +"[#719](https://github.com/vllm-project/vllm-ascend/pull/719)" +msgstr "适配由 modelslim 生成的新量化模型 [#719](https://github.com/vllm-project/vllm-ascend/pull/719)" -#: ../../user_guide/release_notes.md:172 +#: ../../source/user_guide/release_notes.md:1272 msgid "" -"Initial support on P2P Disaggregated Prefill based on llm_datadist [#694]" -"(https://github.com/vllm-project/vllm-ascend/pull/694)" -msgstr "" -"基于 llm_datadist 的 P2P 分布式 Prefill 初步支持 [#694](https://github.com/" -"vllm-project/vllm-ascend/pull/694)" +"Initial support on P2P Disaggregated Prefill based on llm_datadist " +"[#694](https://github.com/vllm-project/vllm-ascend/pull/694)" +msgstr "基于 llm_datadist 的 P2P 分布式预填充初步支持 [#694](https://github.com/vllm-project/vllm-ascend/pull/694)" -#: ../../user_guide/release_notes.md:173 +#: ../../source/user_guide/release_notes.md:1273 msgid "" -"Use `/vllm-workspace` as code path and include `.git` in container image to " -"fix issue when start vllm under `/workspace` [#726](https://github.com/vllm-" -"project/vllm-ascend/pull/726)" +"Use `/vllm-workspace` as code path and include `.git` in container image " +"to fix issue when start vllm under `/workspace` [#726](https://github.com" +"/vllm-project/vllm-ascend/pull/726)" msgstr "" -"使用 `/vllm-workspace` 作为代码路径,并在容器镜像中包含 `.git` ,以修复在 `/" -"workspace` 下启动 vllm 时的问题 [#726](https://github.com/vllm-project/vllm-" -"ascend/pull/726)" +"使用 `/vllm-workspace` 作为代码路径,并在容器镜像中包含 `.git` ,以修复在 `/workspace` 下启动 vllm " +"时的问题 [#726](https://github.com/vllm-project/vllm-ascend/pull/726)" -#: ../../user_guide/release_notes.md:174 +#: ../../source/user_guide/release_notes.md:1274 msgid "" "Optimize NPU memory usage to make DeepSeek R1 W8A8 32K model len work. " "[#728](https://github.com/vllm-project/vllm-ascend/pull/728)" msgstr "" -"优化NPU内存使用,以使 DeepSeek R1 W8A8 32K 模型长度能够运行。[#728](https://" -"github.com/vllm-project/vllm-ascend/pull/728)" +"优化NPU内存使用,以使 DeepSeek R1 W8A8 32K 模型长度能够运行。[#728](https://github.com" +"/vllm-project/vllm-ascend/pull/728)" -#: ../../user_guide/release_notes.md:175 +#: ../../source/user_guide/release_notes.md:1275 msgid "" -"Fix `PYTHON_INCLUDE_PATH` typo in setup.py [#762](https://github.com/vllm-" +"Fix `PYTHON_INCLUDE_PATH` typo in setup.py [#762](https://github.com" +"/vllm-project/vllm-ascend/pull/762)" +msgstr "" +"修复 setup.py 中的 `PYTHON_INCLUDE_PATH` 拼写错误 [#762](https://github.com/vllm-" "project/vllm-ascend/pull/762)" -msgstr "" -"修复 setup.py 中的 `PYTHON_INCLUDE_PATH` 拼写错误 [#762](https://github.com/" -"vllm-project/vllm-ascend/pull/762)" -#: ../../user_guide/release_notes.md:178 +#: ../../source/user_guide/release_notes.md:1279 msgid "" -"Add Qwen3-0.6B test [#717](https://github.com/vllm-project/vllm-ascend/" -"pull/717)" +"Add Qwen3-0.6B test [#717](https://github.com/vllm-project/vllm-" +"ascend/pull/717)" msgstr "" -"添加 Qwen3-0.6B 测试 [#717](https://github.com/vllm-project/vllm-ascend/" -"pull/717)" +"添加 Qwen3-0.6B 测试 [#717](https://github.com/vllm-project/vllm-" +"ascend/pull/717)" -#: ../../user_guide/release_notes.md:179 +#: ../../source/user_guide/release_notes.md:1280 msgid "" -"Add nightly CI [#668](https://github.com/vllm-project/vllm-ascend/pull/668)" -msgstr "" -"添加每晚持续集成 [#668](https://github.com/vllm-project/vllm-ascend/" -"pull/668)" +"Add nightly CI [#668](https://github.com/vllm-project/vllm-" +"ascend/pull/668)" +msgstr "添加夜间持续集成 [#668](https://github.com/vllm-project/vllm-ascend/pull/668)" -#: ../../user_guide/release_notes.md:180 +#: ../../source/user_guide/release_notes.md:1281 msgid "" -"Add accuracy test report [#542](https://github.com/vllm-project/vllm-ascend/" -"pull/542)" -msgstr "" -"添加准确性测试报告 [#542](https://github.com/vllm-project/vllm-ascend/" -"pull/542)" +"Add accuracy test report [#542](https://github.com/vllm-project/vllm-" +"ascend/pull/542)" +msgstr "添加准确性测试报告 [#542](https://github.com/vllm-project/vllm-ascend/pull/542)" -#: ../../user_guide/release_notes.md:182 +#: ../../source/user_guide/release_notes.md:1283 msgid "v0.8.4rc2 - 2025.04.29" msgstr "v0.8.4rc2 - 2025.04.29" -#: ../../user_guide/release_notes.md:184 +#: ../../source/user_guide/release_notes.md:1285 msgid "" "This is the second release candidate of v0.8.4 for vllm-ascend. Please " -"follow the [official doc](https://vllm-ascend.readthedocs.io/en/) to start " -"the journey. Some experimental features are included in this version, such " -"as W8A8 quantization and EP/DP support. We'll make them stable enough in " -"the next release." +"follow the [official doc](https://github.com/vllm-project/vllm-" +"ascend/tree/v0.8.4rc2) to start the journey. Some experimental features " +"are included in this version, such as W8A8 quantization and EP/DP " +"support. We'll make them stable enough in the next release." msgstr "" -"这是 vllm-ascend 的 v0.8.4 第二个候选版本。请按照[官方文档](https://vllm-" -"ascend.readthedocs.io/en/)开始使用。本版本包含了一些实验性功能,如 W8A8 量化" -"和 EP/DP 支持。我们将在下一个版本中使这些功能更加稳定。" +"这是 vllm-ascend v0.8.4 的第二个候选版本。请遵循[官方文档](https://github.com/vllm-project/vllm-" +"ascend/tree/v0.8.4rc2)开始使用。此版本包含一些实验性功能,例如 W8A8 量化和 EP/DP 支持。我们将在下一个版本中使其足够稳定。" -#: ../../user_guide/release_notes.md:187 +#: ../../source/user_guide/release_notes.md:1289 msgid "" -"Qwen3 and Qwen3MOE is supported now. Please follow the [official doc]" -"(https://vllm-ascend.readthedocs.io/en/latest/tutorials/single_npu.html) to " -"run the quick demo. [#709](https://github.com/vllm-project/vllm-ascend/" -"pull/709)" +"Qwen3 and Qwen3MOE is supported now. Please follow the [official " +"doc](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.8.4rc2/docs/source/tutorials/single_npu.md) to run the " +"quick demo. [#709](https://github.com/vllm-project/vllm-ascend/pull/709)" msgstr "" -"现在已支持 Qwen3 和 Qwen3MOE。请按照[官方文档](https://vllm-ascend." -"readthedocs.io/en/latest/tutorials/single_npu.html)运行快速演示。[#709]" -"(https://github.com/vllm-project/vllm-ascend/pull/709)" +"现已支持 Qwen3 和 Qwen3MOE。请遵循[官方文档](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.8.4rc2/docs/source/tutorials/single_npu.md)运行快速演示。[#709](https://github.com" +"/vllm-project/vllm-ascend/pull/709)" -#: ../../user_guide/release_notes.md:188 +#: ../../source/user_guide/release_notes.md:1290 msgid "" -"Ascend W8A8 quantization method is supported now. Please take the [official " -"doc](https://vllm-ascend.readthedocs.io/en/latest/tutorials/" -"multi_npu_quantization.html) for example. Any [feedback](https://github.com/" -"vllm-project/vllm-ascend/issues/619) is welcome. [#580](https://github.com/" -"vllm-project/vllm-ascend/pull/580)" +"Ascend W8A8 quantization method is supported now. Please take the " +"[official doc](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.8.4rc2/docs/source/tutorials/multi_npu_quantization.md) " +"for example. Any [feedback](https://github.com/vllm-project/vllm-" +"ascend/issues/619) is welcome. [#580](https://github.com/vllm-project" +"/vllm-ascend/pull/580)" msgstr "" -"现在支持 Ascend W8A8 量化方法。请参考[官方文档](https://vllm-ascend." -"readthedocs.io/en/latest/tutorials/multi_npu_quantization.html) 示例。欢迎提" -"供任何[反馈](https://github.com/vllm-project/vllm-ascend/issues/619)。[#580]" -"(https://github.com/vllm-project/vllm-ascend/pull/580)" +"现已支持 Ascend W8A8 量化方法。请参考[官方文档](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.8.4rc2/docs/source/tutorials/multi_npu_quantization.md) " +"示例。欢迎提供任何[反馈](https://github.com/vllm-project/vllm-ascend/issues/619)。[#580](https://github.com" +"/vllm-project/vllm-ascend/pull/580)" -#: ../../user_guide/release_notes.md:189 +#: ../../source/user_guide/release_notes.md:1291 msgid "" -"DeepSeek V3/R1 works with DP, TP and MTP now. Please note that it's still " -"in experimental status. Let us know if you hit any problem. [#429](https://" -"github.com/vllm-project/vllm-ascend/pull/429) [#585](https://github.com/" -"vllm-project/vllm-ascend/pull/585) [#626](https://github.com/vllm-project/" -"vllm-ascend/pull/626) [#636](https://github.com/vllm-project/vllm-ascend/" -"pull/636) [#671](https://github.com/vllm-project/vllm-ascend/pull/671)" +"DeepSeek V3/R1 works with DP, TP and MTP now. Please note that it's still" +" in experimental status. Let us know if you hit any problem. " +"[#429](https://github.com/vllm-project/vllm-ascend/pull/429) " +"[#585](https://github.com/vllm-project/vllm-ascend/pull/585) " +"[#626](https://github.com/vllm-project/vllm-ascend/pull/626) " +"[#636](https://github.com/vllm-project/vllm-ascend/pull/636) " +"[#671](https://github.com/vllm-project/vllm-ascend/pull/671)" msgstr "" -"DeepSeek V3/R1 现在已经支持 DP、TP 和 MTP。请注意,目前仍处于实验阶段。如果" -"遇到任何问题,请告知我们。 [#429](https://github.com/vllm-project/vllm-" -"ascend/pull/429) [#585](https://github.com/vllm-project/vllm-ascend/" -"pull/585) [#626](https://github.com/vllm-project/vllm-ascend/pull/626) " -"[#636](https://github.com/vllm-project/vllm-ascend/pull/636) [#671](https://" -"github.com/vllm-project/vllm-ascend/pull/671)" +"DeepSeek V3/R1 现已支持 DP、TP 和 MTP。请注意,目前仍处于实验阶段。如果遇到任何问题,请告知我们。 " +"[#429](https://github.com/vllm-project/vllm-ascend/pull/429) " +"[#585](https://github.com/vllm-project/vllm-ascend/pull/585) " +"[#626](https://github.com/vllm-project/vllm-ascend/pull/626) " +"[#636](https://github.com/vllm-project/vllm-ascend/pull/636) " +"[#671](https://github.com/vllm-project/vllm-ascend/pull/671)" -#: ../../user_guide/release_notes.md:192 +#: ../../source/user_guide/release_notes.md:1295 msgid "" -"ACLGraph feature is supported with V1 engine now. It's disabled by default " -"because this feature rely on CANN 8.1 release. We'll make it available by " -"default in the next release [#426](https://github.com/vllm-project/vllm-" -"ascend/pull/426)" +"ACLGraph feature is supported with V1 engine now. It's disabled by " +"default because this feature rely on CANN 8.1 release. We'll make it " +"available by default in the next release [#426](https://github.com/vllm-" +"project/vllm-ascend/pull/426)" msgstr "" -"ACLGraph 特性现在已被 V1 引擎支持。它默认是禁用的,因为该特性依赖于 CANN " -"8.1 版本。我们将在下一个版本中默认启用此特性 [#426](https://github.com/vllm-" -"project/vllm-ascend/pull/426)。" +"ACLGraph 特性现在已被 V1 引擎支持。它默认是禁用的,因为该特性依赖于 CANN 8.1 版本。我们将在下一个版本中默认启用此特性 " +"[#426](https://github.com/vllm-project/vllm-ascend/pull/426)。" -#: ../../user_guide/release_notes.md:193 +#: ../../source/user_guide/release_notes.md:1296 msgid "" -"Upgrade PyTorch to 2.5.1. vLLM Ascend no longer relies on the dev version " -"of torch-npu now. Now users don't need to install the torch-npu by hand. " -"The 2.5.1 version of torch-npu will be installed automatically. [#661]" -"(https://github.com/vllm-project/vllm-ascend/pull/661)" +"Upgrade PyTorch to 2.5.1. vLLM Ascend no longer relies on the dev version" +" of torch-npu now. Now users don't need to install the torch-npu by hand." +" The 2.5.1 version of torch-npu will be installed automatically. " +"[#661](https://github.com/vllm-project/vllm-ascend/pull/661)" msgstr "" -"升级 PyTorch 至 2.5.1。vLLM Ascend 现在不再依赖 dev 版本的 torch-npu,用户无" -"需手动安装 torch-npu。torch-npu 的 2.5.1 版本将会自动安装。[#661](https://" -"github.com/vllm-project/vllm-ascend/pull/661)" +"升级 PyTorch 至 2.5.1。vLLM Ascend 现在不再依赖开发版本的 torch-npu。用户无需手动安装 torch-npu。" +" torch-npu 的 2.5.1 版本将会自动安装。[#661](https://github.com/vllm-project/vllm-ascend/pull/661)" -#: ../../user_guide/release_notes.md:196 +#: ../../source/user_guide/release_notes.md:1300 msgid "" -"MiniCPM model works now. [#645](https://github.com/vllm-project/vllm-ascend/" -"pull/645)" -msgstr "" -"MiniCPM 模型现在可以使用了。[#645](https://github.com/vllm-project/vllm-" +"MiniCPM model works now. [#645](https://github.com/vllm-project/vllm-" "ascend/pull/645)" - -#: ../../user_guide/release_notes.md:197 -msgid "" -"openEuler container image supported with `v0.8.4-openeuler` tag and customs " -"Ops build is enabled by default for openEuler OS. [#689](https://github.com/" -"vllm-project/vllm-ascend/pull/689)" msgstr "" -"openEuler 容器镜像已支持 `v0.8.4-openeuler` 标签,并且 openEuler 操作系统默" -"认启用了自定义 Ops 构建。[#689](https://github.com/vllm-project/vllm-ascend/" -"pull/689)" +"MiniCPM 模型现在可以运行了。[#645](https://github.com/vllm-project/vllm-ascend/pull/645)" -#: ../../user_guide/release_notes.md:198 +#: ../../source/user_guide/release_notes.md:1301 msgid "" -"Fix ModuleNotFoundError bug to make Lora work [#600](https://github.com/" -"vllm-project/vllm-ascend/pull/600)" +"openEuler container image supported with `v0.8.4-openeuler` tag and " +"customs Ops build is enabled by default for openEuler OS. " +"[#689](https://github.com/vllm-project/vllm-ascend/pull/689)" msgstr "" -"修复 ModuleNotFoundError 错误以使 Lora 正常工作 [#600](https://github.com/" -"vllm-project/vllm-ascend/pull/600)" +"openEuler 容器镜像已支持 `v0.8.4-openeuler` 标签,并且 openEuler 操作系统默认启用了自定义算子构建。[#689](https://github.com" +"/vllm-project/vllm-ascend/pull/689)" -#: ../../user_guide/release_notes.md:199 +#: ../../source/user_guide/release_notes.md:1302 +msgid "" +"Fix ModuleNotFoundError bug to make Lora work [#600](https://github.com" +"/vllm-project/vllm-ascend/pull/600)" +msgstr "" +"修复 ModuleNotFoundError 错误以使 LoRA 正常工作 [#600](https://github.com/vllm-project/vllm-" +"ascend/pull/600)" + +#: ../../source/user_guide/release_notes.md:1303 msgid "" "Add \"Using EvalScope evaluation\" doc [#611](https://github.com/vllm-" "project/vllm-ascend/pull/611)" msgstr "" -"添加了“使用 EvalScope 评估”文档 [#611](https://github.com/vllm-project/vllm-" -"ascend/pull/611)" +"添加了“使用 EvalScope 评估”文档 [#611](https://github.com/vllm-project/vllm-ascend/pull/611)" -#: ../../user_guide/release_notes.md:200 +#: ../../source/user_guide/release_notes.md:1304 msgid "" -"Add a `VLLM_VERSION` environment to make vLLM version configurable to help " -"developer set correct vLLM version if the code of vLLM is changed by hand " -"locally. [#651](https://github.com/vllm-project/vllm-ascend/pull/651)" +"Add a `VLLM_VERSION` environment to make vLLM version configurable to " +"help developer set correct vLLM version if the code of vLLM is changed by" +" hand locally. [#651](https://github.com/vllm-project/vllm-" +"ascend/pull/651)" msgstr "" -"新增了一个 `VLLM_VERSION` 环境变量,使 vLLM 版本可以配置,帮助开发者在本地手" -"动修改 vLLM 代码后,设置正确的 vLLM 版本。[#651](https://github.com/vllm-" -"project/vllm-ascend/pull/651)" +"新增了一个 `VLLM_VERSION` 环境变量,使 vLLM 版本可配置,以帮助开发者在本地手动修改 vLLM 代码后设置正确的 vLLM " +"版本。[#651](https://github.com/vllm-project/vllm-ascend/pull/651)" -#: ../../user_guide/release_notes.md:202 +#: ../../source/user_guide/release_notes.md:1306 msgid "v0.8.4rc1 - 2025.04.18" msgstr "v0.8.4rc1 - 2025.04.18" -#: ../../user_guide/release_notes.md:204 +#: ../../source/user_guide/release_notes.md:1308 msgid "" "This is the first release candidate of v0.8.4 for vllm-ascend. Please " -"follow the [official doc](https://vllm-ascend.readthedocs.io/en/) to start " -"the journey. From this version, vllm-ascend will follow the newest version " -"of vllm and release every two weeks. For example, if vllm releases v0.8.5 " -"in the next two weeks, vllm-ascend will release v0.8.5rc1 instead of " -"v0.8.4rc2. Please find the detail from the [official documentation](https://" -"vllm-ascend.readthedocs.io/en/latest/community/versioning_policy." -"html#release-window)." +"follow the [official doc](https://github.com/vllm-project/vllm-" +"ascend/tree/v0.8.4rc1) to start the journey. From this version, vllm-" +"ascend will follow the newest version of vllm and release every two " +"weeks. For example, if vllm releases v0.8.5 in the next two weeks, vllm-" +"ascend will release v0.8.5rc1 instead of v0.8.4rc2. Please find the " +"detail from the [official " +"documentation](https://docs.vllm.ai/projects/ascend/en/latest/community/versioning_policy.html" +"#release-window)." msgstr "" -"这是 vllm-ascend v0.8.4 的第一个候选发布版本。请按照[官方文档](https://vllm-" -"ascend.readthedocs.io/en/)开始使用。本版本起,vllm-ascend 将跟随 vllm 的最新" -"版本并每两周发布一次。例如,如果 vllm 在接下来的两周内发布 v0.8.5,vllm-" -"ascend 将发布 v0.8.5rc1,而不是 v0.8.4rc2。详细信息请参考[官方文档](https://" -"vllm-ascend.readthedocs.io/en/latest/community/versioning_policy." -"html#release-window)。" +"这是 vllm-ascend v0.8.4 的第一个候选版本。请遵循[官方文档](https://github.com/vllm-project/vllm-" +"ascend/tree/v0.8.4rc1)开始使用。从本版本起,vllm-ascend 将跟随 vllm 的最新版本并每两周发布一次。例如,如果 " +"vllm 在接下来的两周内发布 v0.8.5,vllm-ascend 将发布 v0.8.5rc1,而不是 v0.8.4rc2。详细信息请参阅[官方文档](https://docs.vllm.ai/projects/ascend/en/latest/community/versioning_policy.html#release-window)。" -#: ../../user_guide/release_notes.md:208 +#: ../../source/user_guide/release_notes.md:1312 msgid "" "vLLM V1 engine experimental support is included in this version. You can " -"visit [official guide](https://docs.vllm.ai/en/latest/getting_started/" -"v1_user_guide.html) to get more detail. By default, vLLM will fallback to " -"V0 if V1 doesn't work, please set `VLLM_USE_V1=1` environment if you want " -"to use V1 forcibly." +"visit [official " +"guide](https://docs.vllm.ai/en/v0.8.4/getting_started/v1_user_guide.html)" +" to get more detail. By default, vLLM will fallback to V0 if V1 doesn't " +"work, please set `VLLM_USE_V1=1` environment if you want to use V1 " +"forcibly." msgstr "" -"本版本包含了对 vLLM V1 引擎的实验性支持。你可以访问[官方指南](https://docs." -"vllm.ai/en/latest/getting_started/v1_user_guide.html)获取更多详细信息。默认" -"情况下,如果 V1 不可用,vLLM 会自动回退到 V0。如果你想强制使用 V1,请设置 " -"`VLLM_USE_V1=1` 环境变量。" +"本版本包含了对 vLLM V1 引擎的实验性支持。你可以访问[官方指南](https://docs.vllm.ai/en/v0.8.4/getting_started/v1_user_guide.html)获取更多详细信息。默认情况下,如果" +" V1 不可用,vLLM 会自动回退到 V0。如果你想强制使用 V1,请设置 `VLLM_USE_V1=1` 环境变量。" -#: ../../user_guide/release_notes.md:209 +#: ../../source/user_guide/release_notes.md:1313 msgid "" -"LoRA、Multi-LoRA And Dynamic Serving is supported now. The performance will " -"be improved in the next release. Please follow the [official doc](https://" -"docs.vllm.ai/en/latest/features/lora.html) for more usage information. " -"Thanks for the contribution from China Merchants Bank. [#521](https://" -"github.com/vllm-project/vllm-ascend/pull/521)." +"LoRA、Multi-LoRA And Dynamic Serving is supported now. The performance " +"will be improved in the next release. Please follow the [official " +"doc](https://docs.vllm.ai/en/v0.8.4/features/lora.html) for more usage " +"information. Thanks for the contribution from China Merchants Bank. " +"[#521](https://github.com/vllm-project/vllm-ascend/pull/521)." msgstr "" -"现在已支持 LoRA、Multi-LoRA 和动态服务。性能将在下一个版本中得到提升。请参阅" -"[官方文档](https://docs.vllm.ai/en/latest/features/lora.html)获取更多使用信" -"息。感谢招商银行的贡献。[#521](https://github.com/vllm-project/vllm-ascend/" -"pull/521)。" +"现已支持 LoRA、Multi-LoRA 和动态服务。性能将在下一个版本中得到提升。请遵循[官方文档](https://docs.vllm.ai/en/v0.8.4/features/lora.html)获取更多使用信息。感谢招商银行的贡献。[#521](https://github.com" +"/vllm-project/vllm-ascend/pull/521)。" -#: ../../user_guide/release_notes.md:210 +#: ../../source/user_guide/release_notes.md:1314 msgid "" -"Sleep Mode feature is supported. Currently it's only work on V0 engine. V1 " -"engine support will come soon. [#513](https://github.com/vllm-project/vllm-" -"ascend/pull/513)" +"Sleep Mode feature is supported. Currently it only works on V0 engine. V1" +" engine support will come soon. [#513](https://github.com/vllm-project" +"/vllm-ascend/pull/513)" msgstr "" -"已支持休眠模式功能。目前它只在V0引擎上有效,V1引擎的支持即将到来。[#513]" -"(https://github.com/vllm-project/vllm-ascend/pull/513)" +"已支持休眠模式功能。目前它只在 V0 引擎上工作,V1 引擎的支持即将到来。[#513](https://github.com/vllm-project" +"/vllm-ascend/pull/513)" -#: ../../user_guide/release_notes.md:214 +#: ../../source/user_guide/release_notes.md:1318 msgid "" "The Ascend scheduler is added for V1 engine. This scheduler is more " -"affinity with Ascend hardware. More scheduler policy will be added in the " -"future. [#543](https://github.com/vllm-project/vllm-ascend/pull/543)" +"affinity with Ascend hardware. More scheduler policy will be added in the" +" future. [#543](https://github.com/vllm-project/vllm-ascend/pull/543)" msgstr "" -"为V1引擎新增了Ascend调度器。该调度器与Ascend硬件更加适配。未来还将添加更多调" -"度策略。 [#543](https://github.com/vllm-project/vllm-ascend/pull/543)" +"为 V1 引擎新增了 Ascend 调度器。该调度器与 Ascend 硬件更加适配。未来还将添加更多调度策略。 " +"[#543](https://github.com/vllm-project/vllm-ascend/pull/543)" -#: ../../user_guide/release_notes.md:215 +#: ../../source/user_guide/release_notes.md:1319 msgid "" -"Disaggregated Prefill feature is supported. Currently only 1P1D works. NPND " -"is under design by vllm team. vllm-ascend will support it once it's ready " -"from vLLM. Follow the [official guide](https://docs.vllm.ai/en/latest/" -"features/disagg_prefill.html) to use. [#432](https://github.com/vllm-" -"project/vllm-ascend/pull/432)" +"Disaggregated Prefill feature is supported. Currently only 1P1D works. " +"NPND is under design by vllm team. vllm-ascend will support it once it's " +"ready from vLLM. Follow the [official " +"guide](https://docs.vllm.ai/en/v0.8.4/features/disagg_prefill.html) to " +"use. [#432](https://github.com/vllm-project/vllm-ascend/pull/432)" msgstr "" -"支持分离式预填充(Disaggregated Prefill)功能。目前仅支持1P1D,NPND正在由" -"vllm团队设计中。一旦vLLM支持,vllm-ascend将会支持。请按照[官方指南](https://" -"docs.vllm.ai/en/latest/features/disagg_prefill.html)使用。[#432](https://" -"github.com/vllm-project/vllm-ascend/pull/432)" +"支持分离式预填充功能。目前仅支持 1P1D,NPND 正在由 vllm 团队设计中。一旦 vLLM 准备就绪,vllm-ascend " +"将会支持。请遵循[官方指南](https://docs.vllm.ai/en/v0.8.4/features/disagg_prefill.html)使用。[#432](https://github.com" +"/vllm-project/vllm-ascend/pull/432)" -#: ../../user_guide/release_notes.md:216 +#: ../../source/user_guide/release_notes.md:1320 msgid "" -"Spec decode feature works now. Currently it's only work on V0 engine. V1 " -"engine support will come soon. [#500](https://github.com/vllm-project/vllm-" -"ascend/pull/500)" +"Spec decode feature works now. Currently it only works on V0 engine. V1 " +"engine support will come soon. [#500](https://github.com/vllm-project" +"/vllm-ascend/pull/500)" msgstr "" -"Spec 解码功能现在可以使用。目前它只在 V0 引擎上工作,对 V1 引擎的支持即将到" -"来。[#500](https://github.com/vllm-project/vllm-ascend/pull/500)" +"推测解码功能现已可用。目前它只在 V0 引擎上工作,V1 引擎的支持即将到来。[#500](https://github.com/vllm-project" +"/vllm-ascend/pull/500)" -#: ../../user_guide/release_notes.md:217 +#: ../../source/user_guide/release_notes.md:1321 msgid "" "Structured output feature works now on V1 Engine. Currently it only " -"supports xgrammar backend while using guidance backend may get some errors. " -"[#555](https://github.com/vllm-project/vllm-ascend/pull/555)" +"supports xgrammar backend while using guidance backend may get some " +"errors. [#555](https://github.com/vllm-project/vllm-ascend/pull/555)" msgstr "" -"结构化输出功能现在已在V1引擎上生效。目前仅支持xgrammar后端,使用guidance后端" -"可能会出现一些错误。[#555](https://github.com/vllm-project/vllm-ascend/" -"pull/555)" +"结构化输出功能现在已在 V1 引擎上生效。目前仅支持 xgrammar 后端,使用 guidance 后端可能会出现一些错误。[#555](https://github.com" +"/vllm-project/vllm-ascend/pull/555)" -#: ../../user_guide/release_notes.md:221 +#: ../../source/user_guide/release_notes.md:1325 msgid "" -"A new communicator `pyhccl` is added. It's used for call CANN HCCL library " -"directly instead of using `torch.distribute`. More usage of it will be " -"added in the next release [#503](https://github.com/vllm-project/vllm-" -"ascend/pull/503)" +"A new communicator `pyhccl` is added. It's used for call CANN HCCL " +"library directly instead of using `torch.distribute`. More usage of it " +"will be added in the next release [#503](https://github.com/vllm-project" +"/vllm-ascend/pull/503)" msgstr "" -"新增了一个通信器 `pyhccl`。它用于直接调用 CANN HCCL 库,而不是使用 `torch." -"distribute`。将在下一个版本中添加更多用法 [#503](https://github.com/vllm-" -"project/vllm-ascend/pull/503)。" +"新增了一个通信器 `pyhccl`。它用于直接调用 CANN HCCL 库,而不是使用 `torch.distribute`。将在下一个版本中添加更多用法 " +"[#503](https://github.com/vllm-project/vllm-ascend/pull/503)。" -#: ../../user_guide/release_notes.md:222 +#: ../../source/user_guide/release_notes.md:1326 msgid "" -"The custom ops build is enabled by default. You should install the packages " -"like `gcc`, `cmake` first to build `vllm-ascend` from source. Set " -"`COMPILE_CUSTOM_KERNELS=0` environment to disable the compilation if you " -"don't need it. [#466](https://github.com/vllm-project/vllm-ascend/pull/466)" +"The custom ops build is enabled by default. You should install the " +"packages like `gcc`, `cmake` first to build `vllm-ascend` from source. " +"Set `COMPILE_CUSTOM_KERNELS=0` environment to disable the compilation if " +"you don't need it. [#466](https://github.com/vllm-project/vllm-" +"ascend/pull/466)" msgstr "" -"自定义算子的构建默认是启用的。你应该先安装如 `gcc`、`cmake` 等包以便从源码编" -"译 `vllm-ascend`。如果不需要自定义算子的编译,可以设置环境变量 " -"`COMPILE_CUSTOM_KERNELS=0` 来禁用编译。 [#466](https://github.com/vllm-" -"project/vllm-ascend/pull/466)" +"自定义算子的构建默认是启用的。你应该先安装如 `gcc`、`cmake` 等包以便从源码编译 `vllm-ascend`。如果不需要自定义算子的编译,可以设置环境变量 " +"`COMPILE_CUSTOM_KERNELS=0` 来禁用编译。 [#466](https://github.com/vllm-project/vllm-" +"ascend/pull/466)" -#: ../../user_guide/release_notes.md:223 +#: ../../source/user_guide/release_notes.md:1327 msgid "" -"The custom op `rotay embedding` is enabled by default now to improve the " -"performance. [#555](https://github.com/vllm-project/vllm-ascend/pull/555)" +"The custom op `rotary embedding` is enabled by default now to improve the" +" performance. [#555](https://github.com/vllm-project/vllm-" +"ascend/pull/555)" msgstr "" -"自定义算子 `rotay embedding` 现在已默认启用,以提升性能。[#555](https://" -"github.com/vllm-project/vllm-ascend/pull/555)" +"自定义算子 `rotary embedding` 现已默认启用,以提升性能。[#555](https://github.com/vllm-project/vllm-" +"ascend/pull/555)" -#: ../../user_guide/release_notes.md:225 +#: ../../source/user_guide/release_notes.md:1329 msgid "v0.7.3rc2 - 2025.03.29" msgstr "v0.7.3rc2 - 2025.03.29" -#: ../../user_guide/release_notes.md:227 +#: ../../source/user_guide/release_notes.md:1331 msgid "" -"This is 2nd release candidate of v0.7.3 for vllm-ascend. Please follow the " -"[official doc](https://vllm-ascend.readthedocs.io/en/v0.7.3-dev) to start " -"the journey." +"This is 2nd release candidate of v0.7.3 for vllm-ascend. Please follow " +"the [official doc](https://docs.vllm.ai/projects/ascend/en/v0.7.3) to " +"start the journey." msgstr "" -"这是 vllm-ascend v0.7.3 的第二个候选发布版本。请根据[官方文档](https://vllm-" -"ascend.readthedocs.io/en/v0.7.3-dev)开始使用。" +"这是 vllm-ascend v0.7.3 的第二个候选版本。请遵循[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.7.3)开始使用。" -#: ../../user_guide/release_notes.md:228 ../../user_guide/release_notes.md:250 +#: ../../source/user_guide/release_notes.md:1333 +#: ../../source/user_guide/release_notes.md:1359 msgid "" -"Quickstart with container: https://vllm-ascend.readthedocs.io/en/v0.7.3-dev/" -"quick_start.html" -msgstr "" -"容器快速入门: https://vllm-ascend.readthedocs.io/en/v0.7.3-dev/quick_start." -"html" +"Quickstart with container: " +"" +msgstr "容器快速入门:" -#: ../../user_guide/release_notes.md:229 ../../user_guide/release_notes.md:251 +#: ../../source/user_guide/release_notes.md:1334 +#: ../../source/user_guide/release_notes.md:1360 msgid "" -"Installation: https://vllm-ascend.readthedocs.io/en/v0.7.3-dev/installation." -"html" -msgstr "" -"安装: https://vllm-ascend.readthedocs.io/en/v0.7.3-dev/installation.html" +"Installation: " +"" +msgstr "安装: " -#: ../../user_guide/release_notes.md:232 +#: ../../source/user_guide/release_notes.md:1338 msgid "" -"Add Ascend Custom Ops framework. Developers now can write customs ops using " -"AscendC. An example ops `rotary_embedding` is added. More tutorials will " -"come soon. The Custom Ops compilation is disabled by default when " -"installing vllm-ascend. Set `COMPILE_CUSTOM_KERNELS=1` to enable it. [#371]" -"(https://github.com/vllm-project/vllm-ascend/pull/371)" +"Add Ascend Custom Ops framework. Developers now can write customs ops " +"using AscendC. An example ops `rotary_embedding` is added. More tutorials" +" will come soon. The Custom Ops compilation is disabled by default when " +"installing vllm-ascend. Set `COMPILE_CUSTOM_KERNELS=1` to enable it. " +"[#371](https://github.com/vllm-project/vllm-ascend/pull/371)" msgstr "" -"新增了Ascend自定义算子框架。开发者现在可以使用AscendC编写自定义算子。新增了" -"一个示例算子 `rotary_embedding` 。更多教程即将发布。安装vllm-ascend时,自定" -"义算子的编译默认是关闭的。可通过设置 `COMPILE_CUSTOM_KERNELS=1` 启用。[#371]" -"(https://github.com/vllm-project/vllm-ascend/pull/371)" +"新增 Ascend 自定义算子框架。开发者现在可以使用 AscendC 编写自定义算子。已添加示例算子 `rotary_embedding`。更多教程即将发布。安装 vllm-ascend 时,自定义算子编译默认禁用。设置 `COMPILE_CUSTOM_KERNELS=1` 以启用此功能。 [#371](https://github.com/vllm-project/vllm-ascend/pull/371)" -#: ../../user_guide/release_notes.md:233 +#: ../../source/user_guide/release_notes.md:1339 msgid "" -"V1 engine is basic supported in this release. The full support will be done " -"in 0.8.X release. If you hit any issue or have any requirement of V1 " -"engine. Please tell us [here](https://github.com/vllm-project/vllm-ascend/" -"issues/414). [#376](https://github.com/vllm-project/vllm-ascend/pull/376)" +"V1 engine is basic supported in this release. The full support will be " +"done in 0.8.X release. If you hit any issue or have any requirement of V1" +" engine. Please tell us [this issue](https://github.com/vllm-project" +"/vllm-ascend/issues/414). [#376](https://github.com/vllm-project/vllm-" +"ascend/pull/376)" msgstr "" -"本版本对 V1 引擎提供了基础支持,全面支持将在 0.8.X 版本中完成。如果您遇到任" -"何问题或有 V1 引擎的相关需求,请在[这里](https://github.com/vllm-project/" -"vllm-ascend/issues/414)告诉我们。[#376](https://github.com/vllm-project/" -"vllm-ascend/pull/376)" +"本版本已基本支持 V1 引擎。完整支持将在 0.8.X 版本中实现。如果您遇到任何问题或有 V1 引擎的相关需求,请通过[此问题](https://github.com/vllm-project/vllm-ascend/issues/414)告知我们。 [#376](https://github.com/vllm-project/vllm-ascend/pull/376)" -#: ../../user_guide/release_notes.md:234 +#: ../../source/user_guide/release_notes.md:1340 msgid "" -"Prefix cache feature works now. You can set `enable_prefix_caching=True` to " -"enable it. [#282](https://github.com/vllm-project/vllm-ascend/pull/282)" +"Prefix cache feature works now. You can set `enable_prefix_caching=True` " +"to enable it. [#282](https://github.com/vllm-project/vllm-" +"ascend/pull/282)" msgstr "" -"前缀缓存功能现在已经可用。你可以通过设置 `enable_prefix_caching=True` 来启用" -"该功能。[#282](https://github.com/vllm-project/vllm-ascend/pull/282)" +"前缀缓存功能现已可用。您可以通过设置 `enable_prefix_caching=True` 来启用它。 [#282](https://github.com/vllm-project/vllm-ascend/pull/282)" -#: ../../user_guide/release_notes.md:237 +#: ../../source/user_guide/release_notes.md:1344 msgid "" "Bump torch_npu version to dev20250320.3 to improve accuracy to fix `!!!` " -"output problem. [#406](https://github.com/vllm-project/vllm-ascend/pull/406)" +"output problem. [#406](https://github.com/vllm-project/vllm-" +"ascend/pull/406)" msgstr "" -"将 torch_npu 版本升级到 dev20250320.3 以提升精度,修复 `!!!` 输出问题。" -"[#406](https://github.com/vllm-project/vllm-ascend/pull/406)" +"将 torch_npu 版本升级至 dev20250320.3 以提高精度,修复 `!!!` 输出问题。 [#406](https://github.com/vllm-project/vllm-ascend/pull/406)" -#: ../../user_guide/release_notes.md:240 +#: ../../source/user_guide/release_notes.md:1348 msgid "" "The performance of Qwen2-vl is improved by optimizing patch embedding " "(Conv3D). [#398](https://github.com/vllm-project/vllm-ascend/pull/398)" msgstr "" -"通过优化 patch embedding(Conv3D),Qwen2-vl 的性能得到了提升。[#398]" -"(https://github.com/vllm-project/vllm-ascend/pull/398)" +"通过优化补丁嵌入(Conv3D),Qwen2-vl 的性能得到了提升。 [#398](https://github.com/vllm-project/vllm-ascend/pull/398)" -#: ../../user_guide/release_notes.md:244 +#: ../../source/user_guide/release_notes.md:1352 msgid "" -"Fixed a bug to make sure multi step scheduler feature work. [#349](https://" -"github.com/vllm-project/vllm-ascend/pull/349)" +"Fixed a bug to make sure multi step scheduler feature work. " +"[#349](https://github.com/vllm-project/vllm-ascend/pull/349)" msgstr "" -"修复了一个错误,以确保多步调度器功能正常工作。[#349](https://github.com/" -"vllm-project/vllm-ascend/pull/349)" +"修复了一个错误,确保多步调度器功能正常工作。 [#349](https://github.com/vllm-project/vllm-ascend/pull/349)" -#: ../../user_guide/release_notes.md:245 +#: ../../source/user_guide/release_notes.md:1353 msgid "" -"Fixed a bug to make prefix cache feature works with correct accuracy. [#424]" -"(https://github.com/vllm-project/vllm-ascend/pull/424)" +"Fixed a bug to make prefix cache feature works with correct accuracy. " +"[#424](https://github.com/vllm-project/vllm-ascend/pull/424)" msgstr "" -"修复了一个 bug,使前缀缓存功能能够以正确的准确性运行。[#424](https://github." -"com/vllm-project/vllm-ascend/pull/424)" +"修复了一个错误,使前缀缓存功能能够以正确的精度运行。 [#424](https://github.com/vllm-project/vllm-ascend/pull/424)" -#: ../../user_guide/release_notes.md:247 +#: ../../source/user_guide/release_notes.md:1355 msgid "v0.7.3rc1 - 2025.03.14" msgstr "v0.7.3rc1 - 2025.03.14" -#: ../../user_guide/release_notes.md:249 +#: ../../source/user_guide/release_notes.md:1357 msgid "" "🎉 Hello, World! This is the first release candidate of v0.7.3 for vllm-" -"ascend. Please follow the [official doc](https://vllm-ascend.readthedocs.io/" -"en/v0.7.3-dev) to start the journey." +"ascend. Please follow the [official " +"doc](https://docs.vllm.ai/projects/ascend/en/v0.7.3) to start the " +"journey." msgstr "" -"🎉 你好,世界!这是 vllm-ascend v0.7.3 的第一个候选发布版本。请按照[官方文" -"档](https://vllm-ascend.readthedocs.io/en/v0.7.3-dev)开始你的旅程。" +"🎉 你好,世界!这是 vllm-ascend v0.7.3 的第一个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.7.3)开始您的旅程。" -#: ../../user_guide/release_notes.md:254 +#: ../../source/user_guide/release_notes.md:1364 msgid "" -"DeepSeek V3/R1 works well now. Read the [official guide](https://vllm-" -"ascend.readthedocs.io/en/v0.7.3-dev/tutorials/multi_node.html) to start! " -"[#242](https://github.com/vllm-project/vllm-ascend/pull/242)" +"DeepSeek V3/R1 works well now. Read the [official " +"guide](https://docs.vllm.ai/projects/ascend/en/v0.7.3/tutorials/multi_node.html)" +" to start! [#242](https://github.com/vllm-project/vllm-ascend/pull/242)" msgstr "" -"DeepSeek V3/R1 现在运行良好。请阅读[官方指南](https://vllm-ascend." -"readthedocs.io/en/v0.7.3-dev/tutorials/multi_node.html)开始![#242](https://" -"github.com/vllm-project/vllm-ascend/pull/242)" +"DeepSeek V3/R1 现在运行良好。请阅读[官方指南](https://docs.vllm.ai/projects/ascend/en/v0.7.3/tutorials/multi_node.html)开始使用! [#242](https://github.com/vllm-project/vllm-ascend/pull/242)" -#: ../../user_guide/release_notes.md:255 +#: ../../source/user_guide/release_notes.md:1365 msgid "" -"Speculative decoding feature is supported. [#252](https://github.com/vllm-" -"project/vllm-ascend/pull/252)" +"Speculative decoding feature is supported. [#252](https://github.com" +"/vllm-project/vllm-ascend/pull/252)" +msgstr "已支持推测解码功能。 [#252](https://github.com/vllm-project/vllm-ascend/pull/252)" + +#: ../../source/user_guide/release_notes.md:1366 +msgid "" +"Multi step scheduler feature is supported. [#300](https://github.com" +"/vllm-project/vllm-ascend/pull/300)" +msgstr "已支持多步调度器功能。 [#300](https://github.com/vllm-project/vllm-ascend/pull/300)" + +#: ../../source/user_guide/release_notes.md:1370 +msgid "Bump torch_npu version to dev20250308.3 to improve `_exponential` accuracy" +msgstr "将 torch_npu 版本升级至 dev20250308.3,以提高 `_exponential` 的精度" + +#: ../../source/user_guide/release_notes.md:1371 +msgid "" +"Added initial support for pooling models. Bert based model, such as `BAAI" +"/bge-base-en-v1.5` and `BAAI/bge-reranker-v2-m3` works now. " +"[#229](https://github.com/vllm-project/vllm-ascend/pull/229)" msgstr "" -"已支持猜测性解码功能。[#252](https://github.com/vllm-project/vllm-ascend/" -"pull/252)" +"新增了对池化模型的初步支持。基于 Bert 的模型,例如 `BAAI/bge-base-en-v1.5` 和 `BAAI/bge-reranker-v2-m3` 现已可用。 [#229](https://github.com/vllm-project/vllm-ascend/pull/229)" -#: ../../user_guide/release_notes.md:256 -msgid "" -"Multi step scheduler feature is supported. [#300](https://github.com/vllm-" -"project/vllm-ascend/pull/300)" -msgstr "" -"已支持多步调度器功能。[#300](https://github.com/vllm-project/vllm-ascend/" -"pull/300)" - -#: ../../user_guide/release_notes.md:259 -msgid "" -"Bump torch_npu version to dev20250308.3 to improve `_exponential` accuracy" -msgstr "将 torch_npu 版本升级到 dev20250308.3,以提升 `_exponential` 的精度" - -#: ../../user_guide/release_notes.md:260 -msgid "" -"Added initial support for pooling models. Bert based model, such as `BAAI/" -"bge-base-en-v1.5` and `BAAI/bge-reranker-v2-m3` works now. [#229](https://" -"github.com/vllm-project/vllm-ascend/pull/229)" -msgstr "" -"新增了对池化模型的初步支持。现在支持 Bert 基础模型,如 `BAAI/bge-base-en-" -"v1.5` 和 `BAAI/bge-reranker-v2-m3`。 [#229](https://github.com/vllm-project/" -"vllm-ascend/pull/229)" - -#: ../../user_guide/release_notes.md:263 +#: ../../source/user_guide/release_notes.md:1375 msgid "" "The performance of Qwen2-VL is improved. [#241](https://github.com/vllm-" "project/vllm-ascend/pull/241)" -msgstr "" -"Qwen2-VL 的性能得到了提升。[#241](https://github.com/vllm-project/vllm-" -"ascend/pull/241)" +msgstr "Qwen2-VL 的性能得到了提升。 [#241](https://github.com/vllm-project/vllm-ascend/pull/241)" -#: ../../user_guide/release_notes.md:264 +#: ../../source/user_guide/release_notes.md:1376 msgid "" -"MiniCPM is now supported [#164](https://github.com/vllm-project/vllm-ascend/" -"pull/164)" -msgstr "" -"MiniCPM 现在已被支持 [#164](https://github.com/vllm-project/vllm-ascend/" -"pull/164)" +"MiniCPM is now supported [#164](https://github.com/vllm-project/vllm-" +"ascend/pull/164)" +msgstr "现已支持 MiniCPM [#164](https://github.com/vllm-project/vllm-ascend/pull/164)" -#: ../../user_guide/release_notes.md:267 +#: ../../source/user_guide/release_notes.md:1380 msgid "" -"Support MTP(Multi-Token Prediction) for DeepSeek V3/R1 [#236](https://" -"github.com/vllm-project/vllm-ascend/pull/236)" -msgstr "" -"为 DeepSeek V3/R1 支持 MTP(多标记预测) [#236](https://github.com/vllm-" -"project/vllm-ascend/pull/236)" +"Support MTP(Multi-Token Prediction) for DeepSeek V3/R1 " +"[#236](https://github.com/vllm-project/vllm-ascend/pull/236)" +msgstr "为 DeepSeek V3/R1 支持 MTP(多标记预测) [#236](https://github.com/vllm-project/vllm-ascend/pull/236)" -#: ../../user_guide/release_notes.md:268 +#: ../../source/user_guide/release_notes.md:1381 msgid "" "[Docs] Added more model tutorials, include DeepSeek, QwQ, Qwen and Qwen " -"2.5VL. See the [official doc](https://vllm-ascend.readthedocs.io/en/v0.7.3-" -"dev/tutorials/index.html) for detail" +"2.5VL. See the [official " +"doc](https://docs.vllm.ai/projects/ascend/en/v0.7.3/tutorials/index.html)" +" for detail" msgstr "" -"[文档] 增加了更多的模型教程,包括 DeepSeek、QwQ、Qwen 和 Qwen 2.5VL。详情请" -"参见[官方文档](https://vllm-ascend.readthedocs.io/en/v0.7.3-dev/tutorials/" -"index.html)。" +"[文档] 新增了更多模型教程,包括 DeepSeek、QwQ、Qwen 和 Qwen 2.5VL。详情请参阅[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.7.3/tutorials/index.html)" -#: ../../user_guide/release_notes.md:269 +#: ../../source/user_guide/release_notes.md:1382 msgid "" -"Pin modelscope<1.23.0 on vLLM v0.7.3 to resolve: https://github.com/vllm-" -"project/vllm/pull/13807" -msgstr "" -"在 vLLM v0.7.3 上锁定 modelscope 版本低于 1.23.0,以解决:https://github." -"com/vllm-project/vllm/pull/13807" +"Pin modelscope<1.23.0 on vLLM v0.7.3 to resolve: " +msgstr "在 vLLM v0.7.3 上将 modelscope 版本锁定为 <1.23.0,以解决:" -#: ../../user_guide/release_notes.md:271 ../../user_guide/release_notes.md:302 -msgid "Known issues" -msgstr "已知问题" - -#: ../../user_guide/release_notes.md:272 +#: ../../source/user_guide/release_notes.md:1386 msgid "" "In [some cases](https://github.com/vllm-project/vllm-ascend/issues/324), " -"especially when the input/output is very long, the accuracy of output may " -"be incorrect. We are working on it. It'll be fixed in the next release." +"especially when the input/output is very long, the accuracy of output may" +" be incorrect. We are working on it. It'll be fixed in the next release." msgstr "" -"在[某些情况下](https://github.com/vllm-project/vllm-ascend/issues/324),特别" -"是当输入或输出非常长时,输出的准确性可能会有误。我们正在解决这个问题。将在下" -"一个版本中修复。" +"在[某些情况下](https://github.com/vllm-project/vllm-ascend/issues/324),特别是当输入/输出非常长时,输出的准确性可能不正确。我们正在解决此问题,将在下一个版本中修复。" -#: ../../user_guide/release_notes.md:273 +#: ../../source/user_guide/release_notes.md:1387 msgid "" -"Improved and reduced the garbled code in model output. But if you still hit " -"the issue, try to change the generation config value, such as " -"`temperature`, and try again. There is also a known issue shown below. Any " -"[feedback](https://github.com/vllm-project/vllm-ascend/issues/267) is " -"welcome. [#277](https://github.com/vllm-project/vllm-ascend/pull/277)" +"Improved and reduced the garbled code in model output. But if you still " +"hit the issue, try to change the generation config value, such as " +"`temperature`, and try again. There is also a known issue shown below. " +"Any [feedback](https://github.com/vllm-project/vllm-ascend/issues/267) is" +" welcome. [#277](https://github.com/vllm-project/vllm-ascend/pull/277)" msgstr "" -"改进并减少了模型输出中的乱码问题。但如果你仍然遇到该问题,请尝试更改生成配置" -"的参数,例如 `temperature`,然后再试一次。下面还列出了一个已知问题。欢迎提供" -"任何[反馈](https://github.com/vllm-project/vllm-ascend/issues/267)。[#277]" -"(https://github.com/vllm-project/vllm-ascend/pull/277)" +"已改进并减少了模型输出中的乱码问题。但如果您仍然遇到此问题,请尝试更改生成配置参数,例如 `temperature`,然后重试。下方还列出了一个已知问题。欢迎提供任何[反馈](https://github.com/vllm-project/vllm-ascend/issues/267)。 [#277](https://github.com/vllm-project/vllm-ascend/pull/277)" -#: ../../user_guide/release_notes.md:275 +#: ../../source/user_guide/release_notes.md:1389 msgid "v0.7.1rc1 - 2025.02.19" msgstr "v0.7.1rc1 - 2025.02.19" -#: ../../user_guide/release_notes.md:279 +#: ../../source/user_guide/release_notes.md:1393 msgid "" -"We are excited to announce the first release candidate of v0.7.1 for vllm-" -"ascend." -msgstr "我们很高兴地宣布 vllm-ascend v0.7.1 的第一个候选版本发布。" +"We are excited to announce the first release candidate of v0.7.1 for " +"vllm-ascend." +msgstr "我们很高兴地宣布 vllm-ascend v0.7.1 的第一个候选发布版本。" -#: ../../user_guide/release_notes.md:281 +#: ../../source/user_guide/release_notes.md:1395 msgid "" -"vLLM Ascend Plugin (vllm-ascend) is a community maintained hardware plugin " -"for running vLLM on the Ascend NPU. With this release, users can now enjoy " -"the latest features and improvements of vLLM on the Ascend NPU." +"vLLM Ascend Plugin (vllm-ascend) is a community maintained hardware " +"plugin for running vLLM on the Ascend NPU. With this release, users can " +"now enjoy the latest features and improvements of vLLM on the Ascend NPU." msgstr "" -"vLLM Ascend 插件(vllm-ascend)是一个由社区维护的硬件插件,用于在 Ascend " -"NPU 上运行 vLLM。通过此版本,用户现在可以在 Ascend NPU 上享受到 vLLM 的最新" -"功能和改进。" +"vLLM Ascend 插件(vllm-ascend)是一个由社区维护的硬件插件,用于在 Ascend NPU 上运行 vLLM。通过此版本,用户现在可以在 Ascend NPU 上享受 vLLM 的最新功能和改进。" -#: ../../user_guide/release_notes.md:283 +#: ../../source/user_guide/release_notes.md:1397 msgid "" -"Please follow the [official doc](https://vllm-ascend.readthedocs.io/en/" -"v0.7.1-dev) to start the journey. Note that this is a release candidate, " -"and there may be some bugs or issues. We appreciate your feedback and " -"suggestions [here](https://github.com/vllm-project/vllm-ascend/issues/19)" +"Please follow the [official " +"doc](https://docs.vllm.ai/projects/ascend/en/v0.7.1) to start the " +"journey. Note that this is a release candidate, and there may be some " +"bugs or issues. We appreciate your feedback and suggestions [this " +"issue](https://github.com/vllm-project/vllm-ascend/issues/19)" msgstr "" -"请参阅[官方文档](https://vllm-ascend.readthedocs.io/en/v0.7.1-dev)开始您的体" -"验之旅。请注意,这是一个候选发布版本,可能会有一些漏洞或问题。我们非常欢迎您" -"在[这里](https://github.com/vllm-project/vllm-ascend/issues/19)提交反馈和建" -"议。" +"请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.7.1)开始您的旅程。请注意,这是一个候选发布版本,可能存在一些错误或问题。我们非常感谢您的反馈和建议,请通过[此问题](https://github.com/vllm-project/vllm-ascend/issues/19)提交。" -#: ../../user_guide/release_notes.md:287 +#: ../../source/user_guide/release_notes.md:1401 msgid "" "Initial supports for Ascend NPU on vLLM. [#3](https://github.com/vllm-" "project/vllm-ascend/pull/3)" -msgstr "" -"在 vLLM 上初步支持 Ascend NPU。[#3](https://github.com/vllm-project/vllm-" -"ascend/pull/3)" +msgstr "在 vLLM 上初步支持 Ascend NPU。 [#3](https://github.com/vllm-project/vllm-ascend/pull/3)" -#: ../../user_guide/release_notes.md:288 +#: ../../source/user_guide/release_notes.md:1402 msgid "" "DeepSeek is now supported. [#88](https://github.com/vllm-project/vllm-" -"ascend/pull/88) [#68](https://github.com/vllm-project/vllm-ascend/pull/68)" -msgstr "" -"现在已支持 DeepSeek。 [#88](https://github.com/vllm-project/vllm-ascend/" -"pull/88) [#68](https://github.com/vllm-project/vllm-ascend/pull/68)" +"ascend/pull/88) [#68](https://github.com/vllm-project/vllm-" +"ascend/pull/68)" +msgstr "现已支持 DeepSeek。 [#88](https://github.com/vllm-project/vllm-ascend/pull/88) [#68](https://github.com/vllm-project/vllm-ascend/pull/68)" -#: ../../user_guide/release_notes.md:289 +#: ../../source/user_guide/release_notes.md:1403 msgid "" -"Qwen, Llama series and other popular models are also supported, you can see " -"more details in [here](https://vllm-ascend.readthedocs.io/en/latest/" -"user_guide/supported_models.html)." -msgstr "" -"Qwen、Llama 系列及其他流行的模型也受支持,更多详情可参见[这里](https://vllm-" -"ascend.readthedocs.io/en/latest/user_guide/supported_models.html)。" +"Qwen, Llama series and other popular models are also supported, you can " +"see more details in [supported_models](https://github.com/vllm-project" +"/vllm-ascend/blob/v0.7.1rc1/docs/source/user_guide/supported_models.md)." +msgstr "Qwen、Llama 系列及其他流行模型也已支持,更多详情请参阅 [supported_models](https://github.com/vllm-project/vllm-ascend/blob/v0.7.1rc1/docs/source/user_guide/supported_models.md)。" -#: ../../user_guide/release_notes.md:293 +#: ../../source/user_guide/release_notes.md:1407 msgid "" -"Added the Ascend quantization config option, the implementation will coming " -"soon. [#7](https://github.com/vllm-project/vllm-ascend/pull/7) [#73]" -"(https://github.com/vllm-project/vllm-ascend/pull/73)" -msgstr "" -"新增了 Ascend 量化配置选项,具体实现即将推出。[#7](https://github.com/vllm-" -"project/vllm-ascend/pull/7) [#73](https://github.com/vllm-project/vllm-" -"ascend/pull/73)" +"Added the Ascend quantization config option, the implementation will " +"coming soon. [#7](https://github.com/vllm-project/vllm-ascend/pull/7) " +"[#73](https://github.com/vllm-project/vllm-ascend/pull/73)" +msgstr "新增了 Ascend 量化配置选项,具体实现即将推出。 [#7](https://github.com/vllm-project/vllm-ascend/pull/7) [#73](https://github.com/vllm-project/vllm-ascend/pull/73)" -#: ../../user_guide/release_notes.md:294 +#: ../../source/user_guide/release_notes.md:1408 msgid "" -"Add silu_and_mul and rope ops and add mix ops into attention layer. [#18]" -"(https://github.com/vllm-project/vllm-ascend/pull/18)" -msgstr "" -"添加 silu_and_mul 和 rope 操作,并将混合操作加入到 attention 层。 [#18]" -"(https://github.com/vllm-project/vllm-ascend/pull/18)" +"Add silu_and_mul and rope ops and add mix ops into attention layer. " +"[#18](https://github.com/vllm-project/vllm-ascend/pull/18)" +msgstr "添加 silu_and_mul 和 rope 算子,并将混合算子加入注意力层。 [#18](https://github.com/vllm-project/vllm-ascend/pull/18)" -#: ../../user_guide/release_notes.md:298 +#: ../../source/user_guide/release_notes.md:1412 msgid "" -"[CI] Enable Ascend CI to actively monitor and improve quality for vLLM on " -"Ascend. [#3](https://github.com/vllm-project/vllm-ascend/pull/3)" -msgstr "" -"[CI] 启用 Ascend CI,主动监测并提升 vLLM 在 Ascend 上的质量。[#3](https://" -"github.com/vllm-project/vllm-ascend/pull/3)" +"[CI] Enable Ascend CI to actively monitor and improve quality for vLLM on" +" Ascend. [#3](https://github.com/vllm-project/vllm-ascend/pull/3)" +msgstr "[CI] 启用 Ascend CI,以主动监控并提升 vLLM 在 Ascend 上的质量。 [#3](https://github.com/vllm-project/vllm-ascend/pull/3)" -#: ../../user_guide/release_notes.md:299 +#: ../../source/user_guide/release_notes.md:1413 msgid "" "[Docker] Add vllm-ascend container image [#64](https://github.com/vllm-" "project/vllm-ascend/pull/64)" -msgstr "" -"[Docker] 添加 vllm-ascend 容器镜像 [#64](https://github.com/vllm-project/" -"vllm-ascend/pull/64)" +msgstr "[Docker] 添加 vllm-ascend 容器镜像 [#64](https://github.com/vllm-project/vllm-ascend/pull/64)" -#: ../../user_guide/release_notes.md:300 +#: ../../source/user_guide/release_notes.md:1414 msgid "" -"[Docs] Add a [live doc](https://vllm-ascend.readthedocs.org) [#55](https://" -"github.com/vllm-project/vllm-ascend/pull/55)" -msgstr "" -"[文档] 添加了一个 [在线文档](https://vllm-ascend.readthedocs.org) [#55]" -"(https://github.com/vllm-project/vllm-ascend/pull/55)" +"[Docs] Add a [live doc](https://vllm-ascend.readthedocs.org) " +"[#55](https://github.com/vllm-project/vllm-ascend/pull/55)" +msgstr "[文档] 添加 [在线文档](https://vllm-ascend.readthedocs.org) [#55](https://github.com/vllm-project/vllm-ascend/pull/55)" -#: ../../user_guide/release_notes.md:304 +#: ../../source/user_guide/release_notes.md:1418 msgid "" "This release relies on an unreleased torch_npu version. It has been " -"installed within official container image already. Please [install](https://" -"vllm-ascend.readthedocs.io/en/v0.7.1rc1/installation.html) it manually if " -"you are using non-container environment." -msgstr "" -"此版本依赖于尚未发布的 torch_npu 版本。该版本已集成在官方容器镜像中。如果您" -"使用的是非容器环境,请[手动安装](https://vllm-ascend.readthedocs.io/en/" -"v0.7.1rc1/installation.html)。" +"installed within official container image already. Please " +"[install](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.7.1rc1/docs/source/installation.md) it manually if you are" +" using non-container environment." +msgstr "此版本依赖于一个尚未发布的 torch_npu 版本。该版本已预装在官方容器镜像中。如果您使用的是非容器环境,请[手动安装](https://github.com/vllm-project/vllm-ascend/blob/v0.7.1rc1/docs/source/installation.md)。" -#: ../../user_guide/release_notes.md:305 +#: ../../source/user_guide/release_notes.md:1419 msgid "" "There are logs like `No platform detected, vLLM is running on " "UnspecifiedPlatform` or `Failed to import from vllm._C with " -"ModuleNotFoundError(\"No module named 'vllm._C'\")` shown when running vllm-" -"ascend. It actually doesn't affect any functionality and performance. You " -"can just ignore it. And it has been fixed in this [PR](https://github.com/" -"vllm-project/vllm/pull/12432) which will be included in v0.7.3 soon." -msgstr "" -"在运行 vllm-ascend 时,会显示类似 `No platform detected, vLLM is running on " -"UnspecifiedPlatform` 或 `Failed to import from vllm._C with " -"ModuleNotFoundError(\"No module named 'vllm._C'\")` 的日志。这实际上不会影响" -"任何功能和性能,你可以直接忽略它。这个问题已在此 [PR](https://github.com/" -"vllm-project/vllm/pull/12432) 中修复,并很快会在 v0.7.3 版本中包含。" +"ModuleNotFoundError(\"No module named 'vllm._C'\")` shown when running " +"vllm-ascend. It actually doesn't affect any functionality and " +"performance. You can just ignore it. And it has been fixed in this " +"[PR](https://github.com/vllm-project/vllm/pull/12432) which will be " +"included in v0.7.3 soon." +msgstr "运行 vllm-ascend 时,可能会显示类似 `No platform detected, vLLM is running on UnspecifiedPlatform` 或 `Failed to import from vllm._C with ModuleNotFoundError(\"No module named 'vllm._C'\")` 的日志。这实际上不影响任何功能或性能,您可以忽略它。此问题已在此 [PR](https://github.com/vllm-project/vllm/pull/12432) 中修复,并将很快包含在 v0.7.3 版本中。" -#: ../../user_guide/release_notes.md:306 +#: ../../source/user_guide/release_notes.md:1420 msgid "" "There are logs like `# CPU blocks: 35064, # CPU blocks: 2730` shown when " -"running vllm-ascend which should be `# NPU blocks:` . It actually doesn't " -"affect any functionality and performance. You can just ignore it. And it " -"has been fixed in this [PR](https://github.com/vllm-project/vllm/" -"pull/13378) which will be included in v0.7.3 soon." -msgstr "" -"在运行 vllm-ascend 时,会显示类似 `# CPU blocks: 35064, # CPU blocks: 2730` " -"的日志,实际应该为 `# NPU blocks:`。这实际上不会影响任何功能和性能,你可以忽" -"略它。该问题已在这个 [PR](https://github.com/vllm-project/vllm/pull/13378) " -"中修复,并将在 v0.7.3 版本中包含。" +"running vllm-ascend which should be `# NPU blocks:` . It actually doesn't" +" affect any functionality and performance. You can just ignore it. And it" +" has been fixed in this [PR](https://github.com/vllm-" +"project/vllm/pull/13378) which will be included in v0.7.3 soon." +msgstr "运行 vllm-ascend 时,可能会显示类似 `# CPU blocks: 35064, # CPU blocks: 2730` 的日志,实际应为 `# NPU blocks:`。这实际上不影响任何功能或性能,您可以忽略它。此问题已在此 [PR](https://github.com/vllm-project/vllm/pull/13378) 中修复,并将很快包含在 v0.7.3 版本中。" + +#~ msgid "Known issues" +#~ msgstr "已知问题" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po index 040132f9..72635b23 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po @@ -3,28 +3,27 @@ # This file is distributed under the same license as the PROJECT project. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../user_guide/support_matrix/index.md:5 +#: ../../source/user_guide/support_matrix/index.md:5 msgid "Support Matrix" msgstr "支持矩阵" -#: ../../user_guide/support_matrix/index.md:1 -msgid "Features and models" +#: ../../source/user_guide/support_matrix/index.md:1 +msgid "Features and Models" msgstr "特性与模型" -#: ../../user_guide/support_matrix/index.md:3 -msgid "This section provides a detailed supported matrix by vLLM Ascend." -msgstr "本节提供了 vLLM Ascend 的详细支持矩阵。" +#: ../../source/user_guide/support_matrix/index.md:3 +msgid "This section provides a detailed matrix supported by vLLM Ascend." +msgstr "本节提供了 vLLM Ascend 支持的详细矩阵。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po index b0bfbbb2..0b8e5403 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po @@ -4,261 +4,297 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../user_guide/support_matrix/supported_features.md:1 -msgid "Feature Support" -msgstr "功能支持" +#: ../../source/user_guide/support_matrix/supported_features.md:1 +msgid "Supported Features" +msgstr "支持的功能" -#: ../../user_guide/support_matrix/supported_features.md:3 +#: ../../source/user_guide/support_matrix/supported_features.md:3 msgid "" -"The feature support principle of vLLM Ascend is: **aligned with the vLLM**. " -"We are also actively collaborating with the community to accelerate support." -msgstr "vLLM Ascend 的特性支持原则是:**与 vLLM 保持一致**。我们也在积极与社区合作,加快支持进度。" +"The feature support principle of vLLM Ascend is: **aligned with vLLM**. " +"We are also actively collaborating with the community to accelerate " +"support." +msgstr "vLLM Ascend 的功能支持原则是:**与 vLLM 保持一致**。我们也在积极与社区合作,以加快支持进度。" -#: ../../user_guide/support_matrix/supported_features.md:5 +#: ../../source/user_guide/support_matrix/supported_features.md:5 +msgid "Functional call: " +msgstr "函数调用:" + +#: ../../source/user_guide/support_matrix/supported_features.md:7 msgid "" -"You can check the [support status of vLLM V1 Engine][v1_user_guide]. Below " -"is the feature support status of vLLM Ascend:" -msgstr "你可以查看 [vLLM V1 引擎的支持状态][v1_user_guide]。下面是 vLLM Ascend 的功能支持情况:" +"You can check the [support status of vLLM V1 Engine][v1_user_guide]. " +"Below is the feature support status of vLLM Ascend:" +msgstr "您可以查看 [vLLM V1 引擎的支持状态][v1_user_guide]。以下是 vLLM Ascend 的功能支持状态:" -#: ../../user_guide/support_matrix/supported_features.md +#: ../../source/user_guide/support_matrix/supported_features.md msgid "Feature" -msgstr "特性" +msgstr "功能" -#: ../../user_guide/support_matrix/supported_features.md -msgid "vLLM V0 Engine" -msgstr "vLLM V0 引擎" +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Status" +msgstr "状态" -#: ../../user_guide/support_matrix/supported_features.md -msgid "vLLM V1 Engine" -msgstr "vLLM V1 引擎" - -#: ../../user_guide/support_matrix/supported_features.md +#: ../../source/user_guide/support_matrix/supported_features.md msgid "Next Step" -msgstr "下一步" +msgstr "后续步骤" -#: ../../user_guide/support_matrix/supported_features.md +#: ../../source/user_guide/support_matrix/supported_features.md msgid "Chunked Prefill" msgstr "分块预填充" -#: ../../user_guide/support_matrix/supported_features.md +#: ../../source/user_guide/support_matrix/supported_features.md msgid "🟢 Functional" -msgstr "🟢 功能性" +msgstr "🟢 功能完备" -#: ../../user_guide/support_matrix/supported_features.md -msgid "Functional, see detail note: [Chunked Prefill][cp]" -msgstr "功能性,详见说明:[分块预填充][cp]" +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Functional, see detailed note: [Chunked Prefill][cp]" +msgstr "功能完备,详见说明:[分块预填充][cp]" -#: ../../user_guide/support_matrix/supported_features.md +#: ../../source/user_guide/support_matrix/supported_features.md msgid "Automatic Prefix Caching" msgstr "自动前缀缓存" -#: ../../user_guide/support_matrix/supported_features.md -msgid "Functional, see detail note: [vllm-ascend#732][apc]" -msgstr "可用,请参见详细说明:[vllm-ascend#732][apc]" +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Functional, see detailed note: [vllm-ascend#732][apc]" +msgstr "功能完备,详见说明:[vllm-ascend#732][apc]" -#: ../../user_guide/support_matrix/supported_features.md +#: ../../source/user_guide/support_matrix/supported_features.md msgid "LoRA" msgstr "LoRA" -#: ../../user_guide/support_matrix/supported_features.md -msgid "[vllm-ascend#396][multilora], [vllm-ascend#893][v1 multilora]" -msgstr "[vllm-ascend#396][multilora],[vllm-ascend#893][v1 multilora]" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Prompt adapter" -msgstr "提示适配器" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "🔴 No plan" -msgstr "🔴 无计划" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "This feature has been deprecated by vllm." -msgstr "此功能已被 vllm 弃用。" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Speculative decoding" -msgstr "猜测式解码" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Basic support" -msgstr "基础支持" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Pooling" -msgstr "池化" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "🟡 Planned" -msgstr "🟡 计划中" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "CI needed and adapting more models; V1 support rely on vLLM support." -msgstr "需要持续集成(CI)并适配更多模型;V1 的支持依赖于 vLLM 的支持。" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Enc-dec" -msgstr "Enc-dec(编码-解码)" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "🔴 NO plan" -msgstr "🔴 没有计划" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Plan in 2025.06.30" -msgstr "2025.06.30 的计划" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Multi Modality" -msgstr "多模态" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "[Tutorial][multimodal], optimizing and adapting more models" -msgstr "[教程][multimodal],优化和适配更多模型" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "LogProbs" -msgstr "LogProbs" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "CI needed" -msgstr "需要持续集成(CI)" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Prompt logProbs" -msgstr "提示 logProbs" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Async output" -msgstr "异步输出" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Multi step scheduler" -msgstr "多步调度器" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "🔴 Deprecated" -msgstr "🔴 已弃用" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "[vllm#8779][v1_rfc], replaced by [vLLM V1 Scheduler][v1_scheduler]" -msgstr "[vllm#8779][v1_rfc],已被 [vLLM V1 调度器][v1_scheduler] 替代" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Best of" -msgstr "精选" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "[vllm#13361][best_of], CI needed" -msgstr "[vllm#13361][best_of],需要持续集成(CI)" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Beam search" -msgstr "束搜索" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Guided Decoding" -msgstr "引导解码" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "[vllm-ascend#177][guided_decoding]" -msgstr "[vllm-ascend#177][guided_decoding]" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Tensor Parallel" -msgstr "张量并行" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Pipeline Parallel" -msgstr "流水线并行" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Expert Parallel" -msgstr "专家并行" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "CI needed; No plan on V0 support" -msgstr "需要持续集成;没有支持V0的计划" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Data Parallel" -msgstr "数据并行" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "CI needed; No plan on V0 support" -msgstr "需要 CI;暂无 V0 支持计划" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Prefill Decode Disaggregation" -msgstr "预填充 解码 拆分" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "1P1D available, working on xPyD and V1 support." -msgstr "1P1D 已可用,正在开发 xPyD 和 V1 支持。" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Quantization" -msgstr "量化" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "W8A8 available, CI needed; working on more quantization method support" -msgstr "W8A8 已可用,需要持续集成(CI);正在开发对更多量化方法的支持。" - -#: ../../user_guide/support_matrix/supported_features.md -msgid "Graph Mode" -msgstr "图模式" - -#: ../../user_guide/support_matrix/supported_features.md +#: ../../source/user_guide/support_matrix/supported_features.md msgid "🔵 Experimental" msgstr "🔵 实验性" -#: ../../user_guide/support_matrix/supported_features.md -msgid "Experimental, see detail note: [vllm-ascend#767][graph_mode]" -msgstr "实验性功能,详见说明:[vllm-ascend#767][graph_mode]" +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Functional, see detailed note: [LoRA][LoRA]" +msgstr "功能完备,详见说明:[LoRA][LoRA]" -#: ../../user_guide/support_matrix/supported_features.md +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Speculative decoding" +msgstr "推测解码" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Basic support" +msgstr "基础支持" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Pooling" +msgstr "池化" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "CI needed to adapt to more models; V1 support relies on vLLM support." +msgstr "需要 CI 以适配更多模型;V1 支持依赖于 vLLM 的支持。" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Enc-dec" +msgstr "编码器-解码器" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "🟡 Planned" +msgstr "🟡 计划中" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "vLLM should support this feature first." +msgstr "vLLM 需要首先支持此功能。" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Multi Modality" +msgstr "多模态" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "[Multi Modality][multimodal], optimizing and adapting more models" +msgstr "[多模态][multimodal],优化和适配更多模型" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "LogProbs" +msgstr "LogProbs" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "CI needed" +msgstr "需要 CI" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Prompt logProbs" +msgstr "提示词 LogProbs" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Async output" +msgstr "异步输出" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Beam search" +msgstr "束搜索" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Guided Decoding" +msgstr "引导解码" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "[vllm-ascend#177][guided_decoding]" +msgstr "[vllm-ascend#177][guided_decoding]" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Tensor Parallel" +msgstr "张量并行" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Make TP >4 work with graph mode." +msgstr "使 TP >4 能在图模式下工作。" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Pipeline Parallel" +msgstr "流水线并行" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Write official guide and tutorial." +msgstr "编写官方指南和教程。" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Expert Parallel" +msgstr "专家并行" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Support dynamic EPLB." +msgstr "支持动态 EPLB。" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Data Parallel" +msgstr "数据并行" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Data Parallel support for Qwen3 MoE." +msgstr "为 Qwen3 MoE 提供数据并行支持。" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Prefill Decode Disaggregation" +msgstr "预填充解码分离" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Functional, xPyD is supported." +msgstr "功能完备,支持 xPyD。" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Quantization" +msgstr "量化" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "W8A8 available; working on more quantization method support (W4A8, etc)" +msgstr "W8A8 已可用;正在开发对更多量化方法(如 W4A8 等)的支持" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Graph Mode" +msgstr "图模式" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Functional, see detailed note: [Graph Mode][graph_mode]" +msgstr "功能完备,详见说明:[图模式][graph_mode]" + +#: ../../source/user_guide/support_matrix/supported_features.md msgid "Sleep Mode" -msgstr "睡眠模式" +msgstr "休眠模式" -#: ../../user_guide/support_matrix/supported_features.md -msgid "level=1 available, CI needed, working on V1 support" -msgstr "level=1 可用,需要CI,正在开发 V1 支持" +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Functional, see detailed note: [Sleep Mode][sleep_mode]" +msgstr "功能完备,详见说明:[休眠模式][sleep_mode]" -#: ../../user_guide/support_matrix/supported_features.md:33 +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Context Parallel" +msgstr "上下文并行" + +#: ../../source/user_guide/support_matrix/supported_features.md +msgid "Functional, see detailed note: [Context Parallel][context_parallel]" +msgstr "功能完备,详见说明:[上下文并行][context_parallel]" + +#: ../../source/user_guide/support_matrix/supported_features.md:33 msgid "🟢 Functional: Fully operational, with ongoing optimizations." -msgstr "🟢 功能性:完全可用,正在持续优化中。" +msgstr "🟢 功能完备:完全可用,正在持续优化中。" -#: ../../user_guide/support_matrix/supported_features.md:34 -msgid "" -"🔵 Experimental: Experimental support, interfaces and functions may change." -msgstr "🔵 实验性:实验性支持,接口和功能可能会发生变化。" +#: ../../source/user_guide/support_matrix/supported_features.md:34 +msgid "🔵 Experimental: Experimental support, interfaces and functions may change." +msgstr "🔵 实验性:实验性支持,接口和功能可能发生变化。" -#: ../../user_guide/support_matrix/supported_features.md:35 +#: ../../source/user_guide/support_matrix/supported_features.md:35 msgid "🚧 WIP: Under active development, will be supported soon." -msgstr "🚧 WIP:正在积极开发中,很快将会支持。" +msgstr "🚧 开发中:正在积极开发,即将支持。" -#: ../../user_guide/support_matrix/supported_features.md:36 +#: ../../source/user_guide/support_matrix/supported_features.md:36 msgid "" "🟡 Planned: Scheduled for future implementation (some may have open " "PRs/RFCs)." -msgstr "🟡 计划中:已安排将来实现(其中一些可能已有开放的PR/RFC)。" +msgstr "🟡 计划中:计划在未来实现(部分可能已有开放的 PR/RFC)。" -#: ../../user_guide/support_matrix/supported_features.md:37 -msgid "🔴 NO plan / Deprecated: No plan for V0 or deprecated by vLLM v1." -msgstr "🔴 没有计划 / 已弃用:V0 没有计划或已被 vLLM v1 弃用。" +#: ../../source/user_guide/support_matrix/supported_features.md:37 +msgid "🔴 NO plan/Deprecated: No plan or deprecated by vLLM." +msgstr "🔴 无计划/已弃用:暂无计划或已被 vLLM 弃用。" + +#~ msgid "Feature Support" +#~ msgstr "功能支持" + +#~ msgid "vLLM V0 Engine" +#~ msgstr "vLLM V0 引擎" + +#~ msgid "vLLM V1 Engine" +#~ msgstr "vLLM V1 引擎" + +#~ msgid "[vllm-ascend#396][multilora], [vllm-ascend#893][v1 multilora]" +#~ msgstr "[vllm-ascend#396][multilora], [vllm-ascend#893][v1 multilora]" + +#~ msgid "Prompt adapter" +#~ msgstr "提示词适配器" + +#~ msgid "🔴 No plan" +#~ msgstr "🔴 无计划" + +#~ msgid "This feature has been deprecated by vllm." +#~ msgstr "此功能已被 vllm 弃用。" + +#~ msgid "🔴 NO plan" +#~ msgstr "🔴 无计划" + +#~ msgid "Plan in 2025.06.30" +#~ msgstr "计划于 2025.06.30" + +#~ msgid "Multi step scheduler" +#~ msgstr "多步调度器" + +#~ msgid "🔴 Deprecated" +#~ msgstr "🔴 已弃用" + +#~ msgid "[vllm#8779][v1_rfc], replaced by [vLLM V1 Scheduler][v1_scheduler]" +#~ msgstr "[vllm#8779][v1_rfc],已被 [vLLM V1 调度器][v1_scheduler] 取代" + +#~ msgid "Best of" +#~ msgstr "最佳结果" + +#~ msgid "[vllm#13361][best_of], CI needed" +#~ msgstr "[vllm#13361][best_of],需要 CI" + +#~ msgid "CI needed; No plan on V0 support" +#~ msgstr "需要 CI;暂无 V0 支持计划" + +#~ msgid "CI needed; No plan on V0 support" +#~ msgstr "需要 CI;暂无 V0 支持计划" + +#~ msgid "1P1D available, working on xPyD and V1 support." +#~ msgstr "1P1D 已可用,正在开发 xPyD 和 V1 支持。" + +#~ msgid "Experimental, see detail note: [vllm-ascend#767][graph_mode]" +#~ msgstr "实验性,详见说明:[vllm-ascend#767][graph_mode]" + +#~ msgid "level=1 available, CI needed, working on V1 support" +#~ msgstr "level=1 已可用,需要 CI,正在开发 V1 支持" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po index e787758a..25ca9cfe 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po @@ -4,187 +4,620 @@ # package. # FIRST AUTHOR , 2025. # -#, fuzzy msgid "" msgstr "" -"Project-Id-Version: vllm-ascend\n" +"Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2025-07-18 09:01+0800\n" +"POT-Creation-Date: 2026-04-14 09:08+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" -"Language-Team: zh_CN \n" "Language: zh_CN\n" +"Language-Team: zh_CN \n" +"Plural-Forms: nplurals=1; plural=0;\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=1; plural=0;\n" -"Generated-By: Babel 2.17.0\n" +"Generated-By: Babel 2.18.0\n" -#: ../../user_guide/support_matrix/supported_models.md:1 -msgid "Model Support" -msgstr "模型支持" +#: ../../source/user_guide/support_matrix/supported_models.md:1 +msgid "Supported Models" +msgstr "支持的模型" -#: ../../user_guide/support_matrix/supported_models.md:3 -msgid "Text-only Language Models" +#: ../../source/user_guide/support_matrix/supported_models.md:3 +msgid "" +"Get the latest info here: " +msgstr "获取最新信息请访问:" + +#: ../../source/user_guide/support_matrix/supported_models.md:5 +msgid "**Legend Description**:" +msgstr "**图例说明**:" + +#: ../../source/user_guide/support_matrix/supported_models.md:7 +msgid "✅ = Supported model/feature" +msgstr "✅ = 支持的模型/功能" + +#: ../../source/user_guide/support_matrix/supported_models.md:8 +msgid "🔵 = Experimental supported model/feature" +msgstr "🔵 = 实验性支持的模型/功能" + +#: ../../source/user_guide/support_matrix/supported_models.md:9 +msgid "❌ = Not supported model/feature" +msgstr "❌ = 不支持的模型/功能" + +#: ../../source/user_guide/support_matrix/supported_models.md:10 +msgid "🟡 = Not tested or verified" +msgstr "🟡 = 未测试或未验证" + +#: ../../source/user_guide/support_matrix/supported_models.md:12 +msgid "Text-Only Language Models" msgstr "纯文本语言模型" -#: ../../user_guide/support_matrix/supported_models.md:5 -#: ../../user_guide/support_matrix/supported_models.md:38 +#: ../../source/user_guide/support_matrix/supported_models.md:14 +#: ../../source/user_guide/support_matrix/supported_models.md:74 msgid "Generative Models" msgstr "生成模型" -#: ../../user_guide/support_matrix/supported_models.md +#: ../../source/user_guide/support_matrix/supported_models.md:16 +#: ../../source/user_guide/support_matrix/supported_models.md:76 +msgid "Core Supported Models" +msgstr "核心支持的模型" + +#: ../../source/user_guide/support_matrix/supported_models.md msgid "Model" msgstr "模型" -#: ../../user_guide/support_matrix/supported_models.md -msgid "Supported" -msgstr "支持" +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Support" +msgstr "支持状态" -#: ../../user_guide/support_matrix/supported_models.md +#: ../../source/user_guide/support_matrix/supported_models.md msgid "Note" -msgstr "注释" +msgstr "备注" -#: ../../user_guide/support_matrix/supported_models.md -msgid "DeepSeek v3" -msgstr "DeepSeek v3" +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "BF16" +msgstr "BF16" -#: ../../user_guide/support_matrix/supported_models.md +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Supported Hardware" +msgstr "支持的硬件" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "W8A8" +msgstr "W8A8" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Chunked Prefill" +msgstr "分块预填充" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Automatic Prefix Cache" +msgstr "自动前缀缓存" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "LoRA" +msgstr "LoRA" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Speculative Decoding" +msgstr "推测解码" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Async Scheduling" +msgstr "异步调度" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Tensor Parallel" +msgstr "张量并行" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Pipeline Parallel" +msgstr "流水线并行" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Expert Parallel" +msgstr "专家并行" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Data Parallel" +msgstr "数据并行" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Prefill-decode Disaggregation" +msgstr "预填充-解码解耦" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Piecewise AclGraph" +msgstr "分段 AclGraph" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Fullgraph AclGraph" +msgstr "全图 AclGraph" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "max-model-len" +msgstr "最大模型长度" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "MLP Weight Prefetch" +msgstr "MLP 权重预取" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Doc" +msgstr "文档" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "DeepSeek V3/3.1" +msgstr "DeepSeek V3/3.1" + +#: ../../source/user_guide/support_matrix/supported_models.md msgid "✅" msgstr "✅" -#: ../../user_guide/support_matrix/supported_models.md +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "A2/A3" +msgstr "A2/A3" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "240k" +msgstr "240k" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[DeepSeek-V3.1](../../tutorials/models/DeepSeek-V3.1.md)" +msgstr "[DeepSeek-V3.1](../../tutorials/models/DeepSeek-V3.1.md)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "DeepSeek V3.2" +msgstr "DeepSeek V3.2" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "🔵" +msgstr "🔵" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "160k" +msgstr "160k" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[DeepSeek-V3.2](../../tutorials/models/DeepSeek-V3.2.md)" +msgstr "[DeepSeek-V3.2](../../tutorials/models/DeepSeek-V3.2.md)" + +#: ../../source/user_guide/support_matrix/supported_models.md msgid "DeepSeek R1" msgstr "DeepSeek R1" -#: ../../user_guide/support_matrix/supported_models.md -msgid "DeepSeek Distill (Qwen/LLama)" -msgstr "DeepSeek 精炼(Qwen/LLama)" +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "128k" +msgstr "128k" -#: ../../user_guide/support_matrix/supported_models.md +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[DeepSeek-R1](../../tutorials/models/DeepSeek-R1.md)" +msgstr "[DeepSeek-R1](../../tutorials/models/DeepSeek-R1.md)" + +#: ../../source/user_guide/support_matrix/supported_models.md msgid "Qwen3" msgstr "Qwen3" -#: ../../user_guide/support_matrix/supported_models.md +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[Qwen3-Dense](../../tutorials/models/Qwen3-Dense.md)" +msgstr "[Qwen3-Dense](../../tutorials/models/Qwen3-Dense.md)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Qwen3-Coder" +msgstr "Qwen3-Coder" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "" +"[Qwen3-Coder-30B-A3B tutorial](../../tutorials/models/Qwen3-Coder-30B-" +"A3B.md)" +msgstr "[Qwen3-Coder-30B-A3B 教程](../../tutorials/models/Qwen3-Coder-30B-A3B.md)" + +#: ../../source/user_guide/support_matrix/supported_models.md msgid "Qwen3-Moe" msgstr "Qwen3-Moe" -#: ../../user_guide/support_matrix/supported_models.md +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "256k" +msgstr "256k" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[Qwen3-235B-A22B](../../tutorials/models/Qwen3-235B-A22B.md)" +msgstr "[Qwen3-235B-A22B](../../tutorials/models/Qwen3-235B-A22B.md)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Qwen3-Next" +msgstr "Qwen3-Next" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[Qwen3-Next](../../tutorials/models/Qwen3-Next.md)" +msgstr "[Qwen3-Next](../../tutorials/models/Qwen3-Next.md)" + +#: ../../source/user_guide/support_matrix/supported_models.md msgid "Qwen2.5" msgstr "Qwen2.5" -#: ../../user_guide/support_matrix/supported_models.md -msgid "QwQ-32B" -msgstr "QwQ-32B" +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[Qwen2.5-7B](../../tutorials/models/Qwen2.5-7B.md)" +msgstr "[Qwen2.5-7B](../../tutorials/models/Qwen2.5-7B.md)" -#: ../../user_guide/support_matrix/supported_models.md -msgid "LLama3.1/3.2" -msgstr "LLama3.1/3.2" +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "GLM-4.x" +msgstr "GLM-4.x" -#: ../../user_guide/support_matrix/supported_models.md -msgid "Internlm" -msgstr "Internlm" +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "198k" +msgstr "198k" -#: ../../user_guide/support_matrix/supported_models.md -msgid "Baichuan" -msgstr "百川" +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[GLM-4.x](../../tutorials/models/GLM4.x.md)" +msgstr "[GLM-4.x](../../tutorials/models/GLM4.x.md)" -#: ../../user_guide/support_matrix/supported_models.md -msgid "Phi-4-mini" -msgstr "Phi-4-mini" +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "GLM-5" +msgstr "GLM-5" -#: ../../user_guide/support_matrix/supported_models.md -msgid "MiniCPM" -msgstr "MiniCPM" +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[GLM-5](../../tutorials/models/GLM5.md)" +msgstr "[GLM-5](../../tutorials/models/GLM5.md)" -#: ../../user_guide/support_matrix/supported_models.md -msgid "MiniCPM3" -msgstr "MiniCPM3" +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Kimi-K2-Thinking" +msgstr "Kimi-K2-Thinking" -#: ../../user_guide/support_matrix/supported_models.md -msgid "LLama4" -msgstr "LLama4" +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[Kimi-K2-Thinking](../../tutorials/models/Kimi-K2-Thinking.md)" +msgstr "[Kimi-K2-Thinking](../../tutorials/models/Kimi-K2-Thinking.md)" -#: ../../user_guide/support_matrix/supported_models.md -msgid "Mistral" -msgstr "Mistral" +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "MiniMax-M2.5" +msgstr "MiniMax-M2.5" -#: ../../user_guide/support_matrix/supported_models.md -msgid "Need test" -msgstr "需要测试" - -#: ../../user_guide/support_matrix/supported_models.md -msgid "DeepSeek v2.5" -msgstr "DeepSeek v2.5" - -#: ../../user_guide/support_matrix/supported_models.md -msgid "Gemma-2" -msgstr "Gemma-2" - -#: ../../user_guide/support_matrix/supported_models.md -msgid "Mllama" -msgstr "Mllama" - -#: ../../user_guide/support_matrix/supported_models.md -msgid "Gemma-3" -msgstr "Gemma-3" - -#: ../../user_guide/support_matrix/supported_models.md +#: ../../source/user_guide/support_matrix/supported_models.md msgid "❌" msgstr "❌" -#: ../../user_guide/support_matrix/supported_models.md -msgid "[#496](https://github.com/vllm-project/vllm-ascend/issues/496)" -msgstr "[#496](https://github.com/vllm-project/vllm-ascend/issues/496)" +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "🟡" +msgstr "🟡" -#: ../../user_guide/support_matrix/supported_models.md -msgid "ChatGLM" -msgstr "ChatGLM" +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "192k" +msgstr "192k" -#: ../../user_guide/support_matrix/supported_models.md -msgid "[#554](https://github.com/vllm-project/vllm-ascend/issues/554)" -msgstr "[#554](https://github.com/vllm-project/vllm-ascend/issues/554)" +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[MiniMax-M2.5](../../tutorials/models/MiniMax-M2.md)" +msgstr "[MiniMax-M2.5](../../tutorials/models/MiniMax-M2.md)" -#: ../../user_guide/support_matrix/supported_models.md:29 +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "MiniMax-M2.7" +msgstr "MiniMax-M2.7" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[MiniMax-M2.7](../../tutorials/models/MiniMax-M2.md)" +msgstr "[MiniMax-M2.7](../../tutorials/models/MiniMax-M2.md)" + +#: ../../source/user_guide/support_matrix/supported_models.md:34 +#: ../../source/user_guide/support_matrix/supported_models.md:88 +msgid "Extended Compatible Models" +msgstr "扩展兼容模型" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "DeepSeek Distill (Qwen/Llama)" +msgstr "DeepSeek Distill (Qwen/Llama)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Qwen3-based" +msgstr "基于 Qwen3" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Qwen2" +msgstr "Qwen2" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Qwen2-based" +msgstr "基于 Qwen2" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "QwQ-32B" +msgstr "QwQ-32B" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Llama2/3/3.1/3.2" +msgstr "Llama2/3/3.1/3.2" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Internlm" +msgstr "Internlm" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[#1962](https://github.com/vllm-project/vllm-ascend/issues/1962)" +msgstr "[#1962](https://github.com/vllm-project/vllm-ascend/issues/1962)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Baichuan" +msgstr "Baichuan" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Baichuan2" +msgstr "Baichuan2" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Phi-4-mini" +msgstr "Phi-4-mini" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "MiniCPM" +msgstr "MiniCPM" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "MiniCPM3" +msgstr "MiniCPM3" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Ernie4.5" +msgstr "Ernie4.5" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Ernie4.5-Moe" +msgstr "Ernie4.5-Moe" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Gemma-2" +msgstr "Gemma-2" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Gemma-3" +msgstr "Gemma-3" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Phi-3/4" +msgstr "Phi-3/4" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Mistral/Mistral-Instruct" +msgstr "Mistral/Mistral-Instruct" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "DeepSeek V2.5" +msgstr "DeepSeek V2.5" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Need test" +msgstr "需要测试" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Mllama" +msgstr "Mllama" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "MiniMax-Text" +msgstr "MiniMax-Text" + +#: ../../source/user_guide/support_matrix/supported_models.md:60 msgid "Pooling Models" msgstr "池化模型" -#: ../../user_guide/support_matrix/supported_models.md -msgid "XLM-RoBERTa-based" -msgstr "基于XLM-RoBERTa" +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Qwen3-Embedding" +msgstr "Qwen3-Embedding" -#: ../../user_guide/support_matrix/supported_models.md +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[Qwen3_embedding](../../tutorials/models/Qwen3_embedding.md)" +msgstr "[Qwen3_embedding](../../tutorials/models/Qwen3_embedding.md)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Qwen3-VL-Embedding" +msgstr "Qwen3-VL-Embedding" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[Qwen3-VL-Embedding](../../tutorials/models/Qwen3-VL-Embedding.md)" +msgstr "[Qwen3-VL-Embedding](../../tutorials/models/Qwen3-VL-Embedding.md)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Qwen3-Reranker" +msgstr "Qwen3-Reranker" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[Qwen3_reranker](../../tutorials/models/Qwen3_reranker.md)" +msgstr "[Qwen3_reranker](../../tutorials/models/Qwen3_reranker.md)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Qwen3-VL-Reranker" +msgstr "Qwen3-VL-Reranker" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[Qwen3-VL-Reranker](../../tutorials/models/Qwen3-VL-Reranker.md)" +msgstr "[Qwen3-VL-Reranker](../../tutorials/models/Qwen3-VL-Reranker.md)" + +#: ../../source/user_guide/support_matrix/supported_models.md msgid "Molmo" msgstr "Molmo" -#: ../../user_guide/support_matrix/supported_models.md:36 +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[1942](https://github.com/vllm-project/vllm-ascend/issues/1942)" +msgstr "[1942](https://github.com/vllm-project/vllm-ascend/issues/1942)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "XLM-RoBERTa-based" +msgstr "基于XLM-RoBERTa" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Bert" +msgstr "Bert" + +#: ../../source/user_guide/support_matrix/supported_models.md:72 msgid "Multimodal Language Models" msgstr "多模态语言模型" -#: ../../user_guide/support_matrix/supported_models.md -msgid "Qwen2-VL" -msgstr "Qwen2-VL" - -#: ../../user_guide/support_matrix/supported_models.md +#: ../../source/user_guide/support_matrix/supported_models.md msgid "Qwen2.5-VL" msgstr "Qwen2.5-VL" -#: ../../user_guide/support_matrix/supported_models.md +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "30k" +msgstr "30k" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[Qwen-VL-Dense](../../tutorials/models/Qwen-VL-Dense.md)" +msgstr "[Qwen-VL-Dense](../../tutorials/models/Qwen-VL-Dense.md)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Qwen3-VL" +msgstr "Qwen3-VL" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Qwen3-VL-MOE" +msgstr "Qwen3-VL-MOE" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[Qwen3-VL-MOE](../../tutorials/models/Qwen3-VL-235B-A22B-Instruct.md)" +msgstr "[Qwen3-VL-MOE](../../tutorials/models/Qwen3-VL-235B-A22B-Instruct.md)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Qwen3.5-397B-A17B" +msgstr "Qwen3.5-397B-A17B" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "1010000" +msgstr "1010000" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[Qwen3.5-397B-A17B](../../tutorials/models/Qwen3.5-397B-A17B.md)" +msgstr "[Qwen3.5-397B-A17B](../../tutorials/models/Qwen3.5-397B-A17B.md)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Qwen3.5-27B" +msgstr "Qwen3.5-27B" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[Qwen3.5-27B](../../tutorials/models/Qwen3.5-27B.md)" +msgstr "[Qwen3.5-27B](../../tutorials/models/Qwen3.5-27B.md)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Qwen3-Omni-30B-A3B-Thinking" +msgstr "Qwen3-Omni-30B-A3B-Thinking" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "" +"[Qwen3-Omni-30B-A3B-Thinking](../../tutorials/models/Qwen3-Omni-30B-A3B-" +"Thinking.md)" +msgstr "[Qwen3-Omni-30B-A3B-Thinking](../../tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Qwen2.5-Omni" +msgstr "Qwen2.5-Omni" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[Qwen2.5-Omni](../../tutorials/models/Qwen2.5-Omni.md)" +msgstr "[Qwen2.5-Omni](../../tutorials/models/Qwen2.5-Omni.md)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Qwen2-VL" +msgstr "Qwen2-VL" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Qwen3-Omni" +msgstr "Qwen3-Omni" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "QVQ" +msgstr "QVQ" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Qwen2-Audio" +msgstr "Qwen2-Audio" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Aria" +msgstr "Aria" + +#: ../../source/user_guide/support_matrix/supported_models.md msgid "LLaVA-Next" msgstr "LLaVA-Next" -#: ../../user_guide/support_matrix/supported_models.md +#: ../../source/user_guide/support_matrix/supported_models.md msgid "LLaVA-Next-Video" msgstr "LLaVA-Next-Video" -#: ../../user_guide/support_matrix/supported_models.md -msgid "Phi-3-Vison/Phi-3.5-Vison" -msgstr "Phi-3-Vison/Phi-3.5-Vison" +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "MiniCPM-V" +msgstr "MiniCPM-V" -#: ../../user_guide/support_matrix/supported_models.md -msgid "GLM-4v" -msgstr "GLM-4v" +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Mistral3" +msgstr "Mistral3" -#: ../../user_guide/support_matrix/supported_models.md +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Phi-3-Vision/Phi-3.5-Vision" +msgstr "Phi-3-Vision/Phi-3.5-Vision" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Gemma3" +msgstr "Gemma3" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Llama3.2" +msgstr "Llama3.2" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "PaddleOCR-VL" +msgstr "PaddleOCR-VL" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Llama4" +msgstr "Llama4" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[1972](https://github.com/vllm-project/vllm-ascend/issues/1972)" +msgstr "[1972](https://github.com/vllm-project/vllm-ascend/issues/1972)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Keye-VL-8B-Preview" +msgstr "Keye-VL-8B-Preview" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[1963](https://github.com/vllm-project/vllm-ascend/issues/1963)" +msgstr "[1963](https://github.com/vllm-project/vllm-ascend/issues/1963)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Florence-2" +msgstr "Florence-2" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[2259](https://github.com/vllm-project/vllm-ascend/issues/2259)" +msgstr "[2259](https://github.com/vllm-project/vllm-ascend/issues/2259)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "GLM-4V" +msgstr "GLM-4V" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[2260](https://github.com/vllm-project/vllm-ascend/issues/2260)" +msgstr "[2260](https://github.com/vllm-project/vllm-ascend/issues/2260)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "InternVL2.0/2.5/3.0
InternVideo2.5/Mono-InternVL" +msgstr "InternVL2.0/2.5/3.0
InternVideo2.5/Mono-InternVL" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[2064](https://github.com/vllm-project/vllm-ascend/issues/2064)" +msgstr "[2064](https://github.com/vllm-project/vllm-ascend/issues/2064)" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "Whisper" +msgstr "Whisper" + +#: ../../source/user_guide/support_matrix/supported_models.md +msgid "[2262](https://github.com/vllm-project/vllm-ascend/issues/2262)" +msgstr "[2262](https://github.com/vllm-project/vllm-ascend/issues/2262)" + +#: ../../source/user_guide/support_matrix/supported_models.md msgid "Ultravox" msgstr "Ultravox" + +#~ msgid "Model Support" +#~ msgstr "模型支持" + +#~ msgid "ChatGLM" +#~ msgstr "ChatGLM"