[CI] Upgrade vllm to newest commit (#3182)

### What this PR does / why we need it?
Upgrade vLLM to newest commit

- Fix the aclgraph doesn't work problem, caused by
24fab45d96
- Fix PoolerOutput import error, caused by
755ed7b05b
- Fix the aclgraph weight load error to keep the same with torchair fix.
4492e3a554

### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
All test should pass


- vLLM version: v0.10.2
- vLLM main:
52d0cb8458

---------

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-09-26 06:18:15 +08:00
committed by GitHub
parent 0794f64a18
commit 2930e4a6bd
9 changed files with 49 additions and 53 deletions

View File

@@ -1,9 +1,6 @@
import torch
from torch.nn.parameter import Parameter
from vllm.logger import init_logger
# yapf: disable
from vllm.model_executor.parameter import ModelWeightParameter
# yapf: enable
from vllm.model_executor.utils import set_weight_attrs
from vllm.utils import GiB_bytes
@@ -16,27 +13,15 @@ def create_weights(self, layer: torch.nn.Module, input_size_per_partition: int,
output_partition_sizes: list[int], input_size: int,
output_size: int, params_dtype: torch.dtype,
**extra_weight_attrs):
from vllm_ascend.ascend_config import get_ascend_config
ascend_config = get_ascend_config()
# This method creates unquantized linear weights.
# The weights are not quantized, and they are not sharded.
# The amount of memory allocated for the weights is
# sum(output_partition_sizes) * input_size_per_partition.
try:
if ascend_config.torchair_graph_config.enabled:
weight = Parameter(torch.empty(sum(output_partition_sizes),
input_size_per_partition,
dtype=params_dtype),
requires_grad=False)
else:
weight_loader = extra_weight_attrs.pop("weight_loader")
weight = ModelWeightParameter(data=torch.empty(
sum(output_partition_sizes),
input_size_per_partition,
dtype=params_dtype),
input_dim=1,
output_dim=0,
weight_loader=weight_loader)
weight = Parameter(torch.empty(sum(output_partition_sizes),
input_size_per_partition,
dtype=params_dtype),
requires_grad=False)
except torch.cuda.OutOfMemoryError as e:
logger.error("Failed to create unquantized linear weights: %s", e)
if torch.cuda.is_available():
@@ -49,8 +34,7 @@ def create_weights(self, layer: torch.nn.Module, input_size_per_partition: int,
"Failed to create unquantized linear weights. "
"This may be caused by insufficient memory to allocate "
"the weight.") from e
if ascend_config.torchair_graph_config.enabled:
set_weight_attrs(weight, {"input_dim": 1, "output_dim": 0})
set_weight_attrs(weight, {"input_dim": 1, "output_dim": 0})
layer.register_parameter("weight", weight)
set_weight_attrs(weight, extra_weight_attrs)