[CI] Upgrade vllm to newest commit (#3182)
### What this PR does / why we need it? Upgrade vLLM to newest commit - Fix the aclgraph doesn't work problem, caused by24fab45d96- Fix PoolerOutput import error, caused by755ed7b05b- Fix the aclgraph weight load error to keep the same with torchair fix.4492e3a554### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? All test should pass - vLLM version: v0.10.2 - vLLM main:52d0cb8458--------- Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -33,7 +33,6 @@ from vllm.model_executor.layers.quantization.base_config import (
|
||||
from vllm.model_executor.layers.quantization.kv_cache import BaseKVCacheMethod
|
||||
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
||||
UnquantizedEmbeddingMethod, VocabParallelEmbedding)
|
||||
from vllm.model_executor.parameter import PerTensorScaleParameter
|
||||
from vllm.model_executor.utils import set_weight_attrs
|
||||
|
||||
from vllm_ascend.distributed.parallel_state import (get_mlp_tp_group,
|
||||
@@ -251,7 +250,6 @@ class AscendLinearMethod(LinearMethodBase):
|
||||
**extra_weight_attrs,
|
||||
) -> None:
|
||||
output_size_per_partition = sum(output_partition_sizes)
|
||||
weight_loader = extra_weight_attrs.get("weight_loader")
|
||||
|
||||
weight_dict = self.quant_method.get_weight(input_size_per_partition,
|
||||
output_size_per_partition,
|
||||
@@ -264,8 +262,7 @@ class AscendLinearMethod(LinearMethodBase):
|
||||
|
||||
pertensor_dict = self.quant_method.get_pertensor_param(params_dtype)
|
||||
for pertensor_name, pertensor_param in pertensor_dict.items():
|
||||
param = PerTensorScaleParameter(data=pertensor_param,
|
||||
weight_loader=weight_loader)
|
||||
param = torch.nn.Parameter(pertensor_param, requires_grad=False)
|
||||
# disable warning
|
||||
param.ignore_warning = True
|
||||
layer.register_parameter(pertensor_name, param)
|
||||
|
||||
Reference in New Issue
Block a user