From 052cc4e61bd7625d47f6e93ff03a272c0520be86 Mon Sep 17 00:00:00 2001 From: Canlin Guo <961750412@qq.com> Date: Thu, 12 Feb 2026 08:55:48 +0800 Subject: [PATCH] [Docs] Fix GLM-5 deploy command (#6711) This pull request refines the GLM-5 deployment documentation by updating the Docker run command to include a more comprehensive set of device mappings and by removing an extraneous quantization flag from the `vllm serve` commands. These changes aim to correct and clarify the deployment instructions, ensuring users can successfully set up and run the GLM-5 model as intended. - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 Signed-off-by: Canlin Guo <961750412@qq.com> --- docs/source/tutorials/models/GLM5.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/source/tutorials/models/GLM5.md b/docs/source/tutorials/models/GLM5.md index 13a27995..ddfff28e 100644 --- a/docs/source/tutorials/models/GLM5.md +++ b/docs/source/tutorials/models/GLM5.md @@ -48,6 +48,14 @@ docker run --rm \ --device /dev/davinci5 \ --device /dev/davinci6 \ --device /dev/davinci7 \ +--device /dev/davinci8 \ +--device /dev/davinci9 \ +--device /dev/davinci10 \ +--device /dev/davinci11 \ +--device /dev/davinci12 \ +--device /dev/davinci13 \ +--device /dev/davinci14 \ +--device /dev/davinci15 \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ --device /dev/hisi_hdc \ @@ -181,7 +189,6 @@ vllm serve /root/.cache/modelscope/hub/models/vllm-ascend/GLM5-bf16 \ --data-parallel-address $node0_ip \ --data-parallel-rpc-port 12890 \ --tensor-parallel-size 16 \ ---quantization ascend \ --seed 1024 \ --served-model-name glm-5 \ --enable-expert-parallel \ @@ -228,7 +235,6 @@ vllm serve /root/.cache/modelscope/hub/models/vllm-ascend/GLM5-bf16 \ --data-parallel-address $node0_ip \ --data-parallel-rpc-port 12890 \ --tensor-parallel-size 16 \ ---quantization ascend \ --seed 1024 \ --served-model-name glm-5 \ --enable-expert-parallel \