Upgrade to 0.11.1 newest vllm commit (#3762)
### What this PR does / why we need it?c9461e05a4Fix ```spec decode rejection sampler```, caused by https://github.com/vllm-project/vllm/pull/26060 Fix some ```import```, caused by https://github.com/vllm-project/vllm/pull/27374 Fix ```scheduler_config.send_delta_data```, caused by https://github.com/vllm-project/vllm-ascend/pull/3719 Fix ```init_with_cudagraph_sizes```, caused by https://github.com/vllm-project/vllm/pull/26016 Fix ```vl model```of replacing PatchEmbed's conv3d to linear layer, caused by https://github.com/vllm-project/vllm/pull/27418 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with new added/existing test. - vLLM version: v0.11.0rc3 - vLLM main:c9461e05a4--------- Signed-off-by: Icey <1790571317@qq.com>
This commit is contained in:
@@ -42,7 +42,11 @@ from vllm.model_executor.models.qwen2_5_vl import (
|
||||
from vllm.model_executor.models.utils import maybe_prefix
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
|
||||
from vllm_ascend.utils import ACL_FORMAT_FRACTAL_ND, is_enable_nz
|
||||
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_ND, is_enable_nz,
|
||||
vllm_version_is)
|
||||
|
||||
if not vllm_version_is("0.11.0"):
|
||||
from vllm.model_executor.models.vision import conv3d_to_linear_weight
|
||||
|
||||
MIN_PAD_SIZE = 64 # min_size to pad weight
|
||||
MAX_PAD_SIZE = 128 # max_size to pad weight
|
||||
@@ -355,6 +359,9 @@ class AscendQwen2_5_VisionTransformer(Qwen2_5_VisionTransformer):
|
||||
params_dict = dict(self.named_parameters(remove_duplicate=False))
|
||||
loaded_params: Set[str] = set()
|
||||
for name, loaded_weight in weights:
|
||||
if not vllm_version_is("0.11.0"):
|
||||
if name.endswith("patch_embed.proj.weight"):
|
||||
loaded_weight = conv3d_to_linear_weight(loaded_weight)
|
||||
for (param_name, weight_name, shard_id) in stacked_params_mapping:
|
||||
if weight_name not in name:
|
||||
continue
|
||||
|
||||
@@ -40,7 +40,11 @@ from vllm.model_executor.models.qwen2_vl import (
|
||||
from vllm.model_executor.models.utils import maybe_prefix
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
|
||||
from vllm_ascend.utils import ACL_FORMAT_FRACTAL_ND, is_enable_nz
|
||||
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_ND, is_enable_nz,
|
||||
vllm_version_is)
|
||||
|
||||
if not vllm_version_is("0.11.0"):
|
||||
from vllm.model_executor.models.vision import conv3d_to_linear_weight
|
||||
|
||||
MIN_PAD_SIZE = 64 # min_size to pad weight
|
||||
MAX_PAD_SIZE = 128 # max_size to pad weight
|
||||
@@ -304,6 +308,10 @@ class AscendQwen2VisionTransformer(Qwen2VisionTransformer):
|
||||
loaded_params: Set[str] = set()
|
||||
|
||||
for name, loaded_weight in weights:
|
||||
if not vllm_version_is("0.11.0"):
|
||||
if name.endswith("patch_embed.proj.weight"):
|
||||
loaded_weight = conv3d_to_linear_weight(loaded_weight)
|
||||
|
||||
for (param_name, weight_name, shard_id) in stacked_params_mapping:
|
||||
if weight_name not in name:
|
||||
continue
|
||||
|
||||
Reference in New Issue
Block a user