[Aclgraph][DP] Fix dp dummy run not in aclgraph error (#3208)
### What this PR does / why we need it? When running DP in a non-equilibrium scenario, which means there is some dp groups executing `dummy_run`, we need to make sure it running the same mode as other dp, thus improving then performance in dp scenario ### How was this patch tested? Tested by adding log in `_dummy_run` - vLLM version: v0.10.2 - vLLM main: https://github.com/vllm-project/vllm/commit/releases/v0.11.0 --------- Signed-off-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
@@ -444,6 +444,8 @@ class TestNPUWorker(TestBase):
|
||||
# Create worker mock
|
||||
with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
|
||||
worker = NPUWorker()
|
||||
worker.compilation_config = MagicMock()
|
||||
worker.compilation_config.cudagraph_mode = MagicMock()
|
||||
mock_model_runner = MagicMock()
|
||||
worker.model_runner = mock_model_runner
|
||||
|
||||
@@ -451,7 +453,8 @@ class TestNPUWorker(TestBase):
|
||||
worker.execute_dummy_batch()
|
||||
|
||||
# Verify call
|
||||
mock_model_runner._dummy_run.assert_called_once_with(1)
|
||||
mock_model_runner._dummy_run.assert_called_once_with(
|
||||
num_tokens=1, uniform_decode=True, force_attention=False)
|
||||
|
||||
@patch("vllm_ascend.worker.worker_v1.envs_vllm")
|
||||
@patch("vllm_ascend.worker.worker_v1.logger")
|
||||
|
||||
Reference in New Issue
Block a user