43 lines
1.1 KiB
YAML
43 lines
1.1 KiB
YAML
|
|
# ==========================================
|
||
|
|
# ACTUAL TEST CASES
|
||
|
|
# ==========================================
|
||
|
|
|
||
|
|
test_cases:
|
||
|
|
- name: "DeepSeek-R1-W8A8-HBM-single"
|
||
|
|
model: "vllm-ascend/DeepSeek-R1-W8A8"
|
||
|
|
envs:
|
||
|
|
HCCL_BUFFSIZE: "1024"
|
||
|
|
SERVER_PORT: "DEFAULT_PORT"
|
||
|
|
server_cmd:
|
||
|
|
- "--quantization"
|
||
|
|
- "ascend"
|
||
|
|
- "--port"
|
||
|
|
- "$SERVER_PORT"
|
||
|
|
- "--data-parallel-size"
|
||
|
|
- "8"
|
||
|
|
- "--data-parallel-size-local"
|
||
|
|
- "8"
|
||
|
|
- "--data-parallel-rpc-port"
|
||
|
|
- "13389"
|
||
|
|
- "--tensor-parallel-size"
|
||
|
|
- "2"
|
||
|
|
- "--enable-expert-parallel"
|
||
|
|
- "--seed"
|
||
|
|
- "1024"
|
||
|
|
- "--max-num-seqs"
|
||
|
|
- "32"
|
||
|
|
- "--max-model-len"
|
||
|
|
- "6000"
|
||
|
|
- "--max-num-batched-tokens"
|
||
|
|
- "6000"
|
||
|
|
- "--trust-remote-code"
|
||
|
|
- "--gpu-memory-utilization"
|
||
|
|
- "0.92"
|
||
|
|
- "--no-enable-prefix-caching"
|
||
|
|
- "--reasoning-parser"
|
||
|
|
- "deepseek_r1"
|
||
|
|
- "--enforce-eager"
|
||
|
|
- "--additional-config"
|
||
|
|
- '{"ascend_scheduler_config": {"enabled": false}, "torchair_graph_config": {"enabled": false, "enable_multistream_shared_expert": false}}'
|
||
|
|
benchmarks:
|