diff --git a/.github/workflows/_e2e_nightly_multi_node.yaml b/.github/workflows/_e2e_nightly_multi_node.yaml index ae3bb6be..5f14620a 100644 --- a/.github/workflows/_e2e_nightly_multi_node.yaml +++ b/.github/workflows/_e2e_nightly_multi_node.yaml @@ -216,7 +216,7 @@ jobs: # 1) check follower pods ALL_FOLLOWERS_READY=true - for ((i=1; i<${SIZE}; i++)); do + for ((i=1; i/dev/null || echo "NotFound") READY=$(kubectl get pod "$POD" -n "$NAMESPACE" -o jsonpath='{.status.containerStatuses[*].ready}' 2>/dev/null) diff --git a/docs/source/tutorials/multi_node.md b/docs/source/tutorials/multi_node.md index 6738ee4f..68c7056b 100644 --- a/docs/source/tutorials/multi_node.md +++ b/docs/source/tutorials/multi_node.md @@ -131,9 +131,9 @@ vllm serve vllm-ascend/DeepSeek-V3.1-W8A8 \ --served-model-name deepseek_v3.1 \ --enable-expert-parallel \ --max-num-seqs 16 \ ---max-model-len 32768 \ +--max-model-len 8192 \ --quantization ascend \ ---max-num-batched-tokens 4096 \ +--max-num-batched-tokens 8192 \ --trust-remote-code \ --no-enable-prefix-caching \ --gpu-memory-utilization 0.9 \ @@ -176,8 +176,8 @@ vllm serve vllm-ascend/DeepSeek-V3.1-W8A8 \ --quantization ascend \ --served-model-name deepseek_v3.1 \ --max-num-seqs 16 \ ---max-model-len 32768 \ ---max-num-batched-tokens 4096 \ +--max-model-len 8192 \ +--max-num-batched-tokens 8192 \ --enable-expert-parallel \ --trust-remote-code \ --no-enable-prefix-caching \ diff --git a/docs/source/tutorials/multi_node_kimi.md b/docs/source/tutorials/multi_node_kimi.md index 1a5fe2eb..59bfa8f6 100644 --- a/docs/source/tutorials/multi_node_kimi.md +++ b/docs/source/tutorials/multi_node_kimi.md @@ -88,8 +88,8 @@ vllm serve /home/cache/weights/Kimi-K2-Instruct-W8A8 \ --tensor-parallel-size 8 \ --enable-expert-parallel \ --max-num-seqs 16 \ ---max-model-len 32768 \ ---max-num-batched-tokens 4096 \ +--max-model-len 8192 \ +--max-num-batched-tokens 8192 \ --trust-remote-code \ --no-enable-prefix-caching \ --gpu-memory-utilization 0.9 \ @@ -130,9 +130,9 @@ vllm serve /home/cache/weights/Kimi-K2-Instruct-W8A8 \ --tensor-parallel-size 8 \ --served-model-name kimi \ --max-num-seqs 16 \ ---max-model-len 32768 \ +--max-model-len 8192 \ --quantization ascend \ ---max-num-batched-tokens 4096 \ +--max-num-batched-tokens 8192 \ --enable-expert-parallel \ --trust-remote-code \ --no-enable-prefix-caching \