diff --git a/vllm_ascend/utils.py b/vllm_ascend/utils.py index aec8decf..b7d1bae1 100644 --- a/vllm_ascend/utils.py +++ b/vllm_ascend/utils.py @@ -485,7 +485,10 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None: resources_per_graph = num_hidden_layers + 1 # For suffix decoding, use the suffix path when no draft_model_config is provided. if (spec := vllm_config.speculative_config) and (draft := spec.draft_model_config): - resources_per_graph += draft.hf_config.num_hidden_layers + 1 + # Use get_total_num_hidden_layers() to correctly handle MTP models, + # which store layer count in num_nextn_predict_layers or + # mtp_num_hidden_layers (for Qwen3.5) instead of num_hidden_layers. + resources_per_graph += draft.get_total_num_hidden_layers() + 1 # TODO: Find out whether we need to take into account the pp_size num_comm_groups = sum(