Files
xc-llm-ascend/tests/e2e/nightly/multi_node/config/models/DeepSeep-R1-W8A8-A2.yaml
jiangyunfan1 e56b0017a3 [TEST]Add aisbench log and A2 cases (#3841)
### What this PR does / why we need it?
This PR adds 2 more A2 caces which we need to test daily. It also
enhances the logging for aisbench test failures to improve issues
identification
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
By running the test

- vLLM version: v0.11.0rc3
- vLLM main:
https://github.com/vllm-project/vllm/commit/releases/v0.11.1

---------

Signed-off-by: jiangyunfan1 <jiangyunfan1@h-partners.com>
2025-10-28 23:33:15 +08:00

58 lines
1.8 KiB
YAML

test_name: "test DeepSeek-R1-W8A8 on A2"
model: "vllm-ascend/DeepSeek-R1-0528-W8A8"
num_nodes: 2
npu_per_node: 8
env_common:
VLLM_USE_MODELSCOPE: true
HCCL_BUFFSIZE: 1024
SERVER_PORT: 8080
OMP_PROC_BIND: false
OMP_NUM_THREADS: 10
deployment:
-
server_cmd: >
vllm serve vllm-ascend/DeepSeek-R1-0528-W8A8
--host 0.0.0.0
--port $SERVER_PORT
--data-parallel-size 4
--data-parallel-size-local 2
--data-parallel-address $LOCAL_IP
--data-parallel-rpc-port 13399
--no-enable-prefix-caching
--max-num-seqs 16
--tensor-parallel-size 4
--max-model-len 36864
--max-num-batched-tokens 6000
--enable-expert-parallel
--trust-remote-code
--quantization ascend
--gpu-memory-utilization 0.9
--enforce-eager
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
--additional-config '{"ascend_scheduler_config":{"enabled":false},"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}'
-
server_cmd: >
vllm serve vllm-ascend/DeepSeek-R1-0528-W8A8
--headless
--data-parallel-size 4
--data-parallel-rpc-port 13399
--data-parallel-size-local 2
--data-parallel-start-rank 2
--data-parallel-address $MASTER_IP
--no-enable-prefix-caching
--max-num-seqs 16
--tensor-parallel-size 4
--max-model-len 36864
--max-num-batched-tokens 6000
--enable-expert-parallel
--trust-remote-code
--quantization ascend
--gpu-memory-utilization 0.9
--enforce-eager
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
--additional-config '{"ascend_scheduler_config":{"enabled":false},"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}'
benchmarks: