[Fix] Fix update_aclgraph_sizes when running MoE models (#913)

### What this PR does / why we need it?
Fix update_aclgraph_sizes when running MoE models.

---------

Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com>
This commit is contained in:
yiz-liu
2025-05-30 15:17:11 +08:00
committed by GitHub
parent 3442fbdb23
commit 5a1689fc64
5 changed files with 47 additions and 35 deletions

View File

@@ -126,14 +126,16 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
original_sizes, compilation_config.cudagraph_capture_sizes = \
compilation_config.cudagraph_capture_sizes, None
# Calculate parallel configuration factor (increases with DP or TP)
# TODO(Yizhou): This is a temporary solution, need to be improved
# in the future, taking into account the other parallel configurations.
# Calculate parallel configuration factor
num_hidden_layers = vllm_config.model_config.hf_config.num_hidden_layers
parallel_config = vllm_config.parallel_config
# TODO: Find out whether we need to take into account the pp_size
parallel_factor = 1 + sum(size > 1 for size in [
parallel_config.data_parallel_size,
parallel_config.tensor_parallel_size
parallel_config.data_parallel_size_local,
parallel_config.tensor_parallel_size,
parallel_config.expert_parallel_size,
parallel_config.expert_tensor_parallel_size,
])
# Calculate maximum supported batch sizes considering model architecture