Files
xc-llm-ascend/tests/e2e/multi_node/config/config.json
Li Wang 1b1207e3c3 [Bugfix] Add quantization param for multi-node CI (#3383)
### What this PR does / why we need it?
Add quantization param for `deepseek-w8a8` multi-node test
### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.11.0rc3
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0

Signed-off-by: wangli <wangli858794774@gmail.com>
2025-10-11 19:25:16 +08:00

44 lines
1.4 KiB
JSON

[
{
"test_name": "test_deepseek_v3",
"disaggregate_prefill": false,
"enable_multithread_load": false,
"num_nodes": 2,
"server_parameters": {
"leader_config": {
"model": "vllm-ascend/DeepSeek-V3-W8A8",
"quantization": "ascend",
"additional_config": {
"ascend_scheduler_config": {
"enabled": true
},
"torchair_graph_config": {
"enabled": true
}
}
},
"worker_config": {
"model": "vllm-ascend/DeepSeek-V3-W8A8",
"quantization": "ascend",
"additional_config": {
"ascend_scheduler_config": {
"enabled": true
},
"torchair_graph_config": {
"enabled": true
}
}
}
},
"client_parameters": {
"model": "vllm-ascend/DeepSeek-V3-W8A8",
"backend": "vllm",
"dataset_name": "sharegpt",
"dataset_path": "/root/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
"num_prompts": 200,
"request_rate": 1
},
"accuracy_parameters": {}
}
]