Upgrade to vllm 0.17.0 corex v4.1 overlay

This commit is contained in:
2026-04-29 19:38:22 +08:00
parent 8fac6062e4
commit 938d0854a5
430 changed files with 35969 additions and 14511 deletions

View File

@@ -24,6 +24,7 @@
# limitations under the License.
"""Inference-only Qwen3-VL-MoE model compatible with HuggingFace weights."""
from platform import architecture
import typing
from collections.abc import Callable, Iterable
from itertools import islice
@@ -45,6 +46,7 @@ from vllm.model_executor.model_loader.weight_utils import (
)
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.sequence import IntermediateTensors
from vllm.tokenizers.registry import cached_tokenizer_from_config
from .interfaces import MixtureOfExperts
from .qwen3_moe import (
@@ -415,6 +417,7 @@ class Qwen3VLMoeForConditionalGeneration(
multimodal_config = vllm_config.model_config.multimodal_config
self.config = config
self._tokenizer = cached_tokenizer_from_config(vllm_config.model_config)
self.multimodal_config = multimodal_config
self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data"
self.video_pruning_rate = multimodal_config.video_pruning_rate
@@ -451,14 +454,14 @@ class Qwen3VLMoeForConditionalGeneration(
with self._mark_language_model(vllm_config):
self.language_model = Qwen3MoeLLMForCausalLM(
vllm_config=vllm_config.with_hf_config(config.text_config),
vllm_config=vllm_config.with_hf_config(config.text_config, architectures=["Qwen3MoeForCausalLM"]),
prefix=maybe_prefix(prefix, "language_model"),
)
if not get_pp_group().is_first_rank and hasattr(
config.vision_config, "deepstack_visual_indexes"
):
assert self.language_model.start_layer >= len(
assert self.language_model.model.start_layer >= len(
config.vision_config.deepstack_visual_indexes
), (
"start_layer should be greater than or equal to "