[CI]Fix oom of deepseek-eplb nigtly test. (#3884)

### What this PR does / why we need it?
Fix oom of deepseek-eplb nigtly test

- vLLM version: v0.11.0rc3
- vLLM main:
83f478bb19

---------

Signed-off-by: offline0806 <3337230449@qq.com>
Co-authored-by: offline0806 <3337230449@qq.com>
This commit is contained in:
offline893
2025-10-30 10:18:07 +08:00
committed by GitHub
parent dc960e798e
commit 14ca1e5cb2
4 changed files with 4 additions and 4 deletions

View File

@@ -85,7 +85,7 @@ async def test_models(model: str, tp_size: int, dp_size: int) -> None:
"--quantization", "ascend", "--gpu-memory-utilization", "0.9",
"--additional-config", '{"enable_weight_nz_layout":true, '
'"torch_air_graph_config":{"enabled": true, "enable_multistream_mla": true, "graph_batch_size": [16], "use_cached_graph": true},'
'"dynamic_eplb": true, "num_iterations_eplb_update": 1000, "num_wait_worker_iterations": 200, "init_redundancy_expert": 16}'
'"dynamic_eplb": true, "num_iterations_eplb_update": 1000, "num_wait_worker_iterations": 200'
]
request_keyword_args: dict[str, Any] = {
**api_keyword_args,

View File

@@ -82,8 +82,7 @@ async def test_models(model: str, tp_size: int) -> None:
"--quantization", "ascend", "--gpu-memory-utilization", "0.9",
"--additional-config",
'{"enable_weight_nz_layout":true, "dynamic_eplb": true, '
'"num_iterations_eplb_update": 1000, "num_wait_worker_iterations": 200, '
'"init_redundancy_expert": 16}'
'"num_iterations_eplb_update": 1000, "num_wait_worker_iterations": 200}'
]
request_keyword_args: dict[str, Any] = {
**api_keyword_args,