upgrade main to 0212 (#6712)
### What this PR does / why we need it?
Fixes `transformers_utils/processors/__init__` import error, due to
https://github.com/vllm-project/vllm/pull/33247
Fixes Fused MoE break introduced by `MoERunner abstraction,` due to
https://github.com/vllm-project/vllm/pull/32344
> delete AscendMoERunnere when
https://github.com/vllm-project/vllm/pull/35178 is merged
Fixes `Make Qwen3VL compatible with Transformers v5`, due to
https://github.com/vllm-project/vllm/pull/34262
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.15.0
- vLLM main:
9562912cea
---------
Signed-off-by: wxsIcey <1790571317@qq.com>
This commit is contained in:
@@ -525,6 +525,13 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
|
||||
"increase the number of supported shapes, set HCCL_OP_EXPANSION_MODE=AIV."
|
||||
)
|
||||
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
if vllm_version_is("0.15.0"):
|
||||
arch_name = vllm_config.model_config.architectures[0]
|
||||
else:
|
||||
arch_name = vllm_config.model_config.architecture
|
||||
|
||||
# If original sizes exceed maximum, sample a representative subset
|
||||
if max_num_batch_sizes < len(original_sizes):
|
||||
# Sample uniformly from original sizes
|
||||
@@ -536,10 +543,9 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
|
||||
|
||||
sampled_sizes = [original_sizes[i] for i in indices]
|
||||
update_cudagraph_capture_sizes(vllm_config, sampled_sizes)
|
||||
|
||||
logger.info(
|
||||
"Adjusted ACL graph batch sizes for %s model (layers: %d): %d → %d sizes",
|
||||
vllm_config.model_config.architectures[0],
|
||||
arch_name,
|
||||
num_hidden_layers,
|
||||
len(original_sizes),
|
||||
len(
|
||||
@@ -551,7 +557,7 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
|
||||
compilation_config.cudagraph_capture_sizes = original_sizes
|
||||
logger.info(
|
||||
"No adjustment needed for ACL graph batch sizes: %s model (layers: %d) with %d sizes",
|
||||
vllm_config.model_config.architectures[0],
|
||||
arch_name,
|
||||
num_hidden_layers,
|
||||
len(original_sizes),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user