[Fix] Add extra warmup run count for MC2 on specific SoC version (#4843)
### What this PR does / why we need it? We didn’t account for this earlier because we didn’t have A3 in CI, but now that we do, this test case needs a few extra tweaks — please take a look at `profile_run`. Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com> Co-authored-by: Mengqing Cao <cmq0113@163.com>
This commit is contained in:
@@ -25,6 +25,8 @@ import pytest
|
|||||||
import torch
|
import torch
|
||||||
from vllm.utils.network_utils import get_open_port
|
from vllm.utils.network_utils import get_open_port
|
||||||
|
|
||||||
|
from vllm_ascend.utils import AscendDeviceType, get_ascend_device_type
|
||||||
|
|
||||||
MODELS = [
|
MODELS = [
|
||||||
"Qwen/Qwen3-0.6B",
|
"Qwen/Qwen3-0.6B",
|
||||||
"vllm-ascend/DeepSeek-V2-Lite-W8A8",
|
"vllm-ascend/DeepSeek-V2-Lite-W8A8",
|
||||||
@@ -212,6 +214,10 @@ def test_aclgraph_capture_replay_dp2(
|
|||||||
|
|
||||||
# Part A: Warmup runs (Profile run + 2 runs per captured graph)
|
# Part A: Warmup runs (Profile run + 2 runs per captured graph)
|
||||||
warmup_runs = 1 + (2 * max_batch_sizes)
|
warmup_runs = 1 + (2 * max_batch_sizes)
|
||||||
|
soc_version = get_ascend_device_type()
|
||||||
|
if soc_version in {AscendDeviceType._910_93} and "DeepSeek" in model:
|
||||||
|
# An extra warmup run is needed for MC2 warmup here
|
||||||
|
warmup_runs += 1
|
||||||
|
|
||||||
# Part B: Alignment padding (Empty runs to hit the 32-step boundary)
|
# Part B: Alignment padding (Empty runs to hit the 32-step boundary)
|
||||||
padding_runs = aligned_steps - total_steps
|
padding_runs = aligned_steps - total_steps
|
||||||
|
|||||||
Reference in New Issue
Block a user