Upgrade to vllm 0.17.0 corex v4.1 overlay
This commit is contained in:
@@ -24,6 +24,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only Qwen3-VL-MoE model compatible with HuggingFace weights."""
|
||||
|
||||
from platform import architecture
|
||||
import typing
|
||||
from collections.abc import Callable, Iterable
|
||||
from itertools import islice
|
||||
@@ -45,6 +46,7 @@ from vllm.model_executor.model_loader.weight_utils import (
|
||||
)
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.tokenizers.registry import cached_tokenizer_from_config
|
||||
|
||||
from .interfaces import MixtureOfExperts
|
||||
from .qwen3_moe import (
|
||||
@@ -415,6 +417,7 @@ class Qwen3VLMoeForConditionalGeneration(
|
||||
multimodal_config = vllm_config.model_config.multimodal_config
|
||||
|
||||
self.config = config
|
||||
self._tokenizer = cached_tokenizer_from_config(vllm_config.model_config)
|
||||
self.multimodal_config = multimodal_config
|
||||
self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data"
|
||||
self.video_pruning_rate = multimodal_config.video_pruning_rate
|
||||
@@ -451,14 +454,14 @@ class Qwen3VLMoeForConditionalGeneration(
|
||||
|
||||
with self._mark_language_model(vllm_config):
|
||||
self.language_model = Qwen3MoeLLMForCausalLM(
|
||||
vllm_config=vllm_config.with_hf_config(config.text_config),
|
||||
vllm_config=vllm_config.with_hf_config(config.text_config, architectures=["Qwen3MoeForCausalLM"]),
|
||||
prefix=maybe_prefix(prefix, "language_model"),
|
||||
)
|
||||
|
||||
if not get_pp_group().is_first_rank and hasattr(
|
||||
config.vision_config, "deepstack_visual_indexes"
|
||||
):
|
||||
assert self.language_model.start_layer >= len(
|
||||
assert self.language_model.model.start_layer >= len(
|
||||
config.vision_config.deepstack_visual_indexes
|
||||
), (
|
||||
"start_layer should be greater than or equal to "
|
||||
|
||||
Reference in New Issue
Block a user