Upgrade to vllm 0.17.0 corex v4.1 overlay
This commit is contained in:
@@ -300,14 +300,26 @@ class Base(
|
||||
for child_name, child_module in module.named_children():
|
||||
new_module = child_module
|
||||
qual_name = maybe_prefix(prefix, child_name)
|
||||
# Populate Eagle3 attrs
|
||||
if (
|
||||
isinstance(module, nn.ModuleList)
|
||||
and len(module) == self.text_config.num_hidden_layers
|
||||
):
|
||||
# Populate Eagle3 attrs
|
||||
self._target_class = type(child_module)
|
||||
layer_name = qual_name.removeprefix("model.")
|
||||
self._layer_names[int(child_name)] = layer_name
|
||||
# MTP weights should not be loaded into the base model
|
||||
num_hidden_layers = self.text_config.num_hidden_layers
|
||||
names = (
|
||||
"n_predict", # Override from SpeculativeConfig
|
||||
"num_nextn_predict_layers", # Most models
|
||||
"mtp_num_hidden_layers", # Qwen 3.5
|
||||
)
|
||||
n_predict = getattr_iter(self.text_config, names, 0)
|
||||
for i in range(num_hidden_layers, num_hidden_layers + n_predict):
|
||||
mtp_prefix = f"{prefix}.{i}."
|
||||
if mtp_prefix not in self.ignore_unexpected_prefixes:
|
||||
self.ignore_unexpected_prefixes.append(mtp_prefix)
|
||||
# Replace modules as needed
|
||||
if isinstance(child_module, nn.Linear):
|
||||
generator = (p for p in tp_plan if re.match(p, qual_name))
|
||||
|
||||
@@ -218,7 +218,7 @@ class MultiModalProcessor(BaseMultiModalProcessor[MultiModalProcessingInfo]):
|
||||
if "mm_token_type_ids" in processed_data
|
||||
else "token_type_ids"
|
||||
)
|
||||
mm_token_type_ids = processed_data.pop(token_type_key)
|
||||
mm_token_type_ids = processed_data.get(token_type_key)
|
||||
|
||||
# We can infer vLLM style placeholder from token type ids, if we split
|
||||
# it for each input `mm_data`.
|
||||
@@ -353,6 +353,7 @@ class MultiModalMixin(SupportsMultiModal, SupportsMRoPE):
|
||||
|
||||
num_image_patches = kwargs.pop("num_image_patches")
|
||||
kwargs.pop("token_type_ids", None) # used only in `forward`
|
||||
kwargs.pop("mm_token_type_ids", None) # used only in `model.get_rope_index`
|
||||
|
||||
if pixel_values is not None:
|
||||
# ROCm: Force math SDP backend for vision encoder to avoid accuracy issues
|
||||
@@ -443,6 +444,7 @@ class MultiModalMixin(SupportsMultiModal, SupportsMRoPE):
|
||||
{
|
||||
"image_grid_thw",
|
||||
"video_grid_thw",
|
||||
"mm_token_type_ids",
|
||||
"second_per_grid_ts",
|
||||
"audio_feature_lengths",
|
||||
"use_audio_in_video",
|
||||
@@ -451,7 +453,7 @@ class MultiModalMixin(SupportsMultiModal, SupportsMRoPE):
|
||||
if any(
|
||||
v
|
||||
for k, v in kwargs.items()
|
||||
if k not in {"image_grid_thw", "video_grid_thw"}
|
||||
if k not in {"image_grid_thw", "mm_token_type_ids"}
|
||||
):
|
||||
raise NotImplementedError(
|
||||
"Transformers modeling backend only supports images."
|
||||
@@ -459,6 +461,7 @@ class MultiModalMixin(SupportsMultiModal, SupportsMRoPE):
|
||||
|
||||
image_grid_thw = kwargs.get("image_grid_thw", [])
|
||||
video_grid_thw = kwargs.get("video_grid_thw", [])
|
||||
mm_token_type_ids = kwargs.get("mm_token_type_ids")
|
||||
|
||||
image_grid_thw = (torch.stack if image_grid_thw else torch.tensor)(
|
||||
image_grid_thw
|
||||
@@ -467,10 +470,29 @@ class MultiModalMixin(SupportsMultiModal, SupportsMRoPE):
|
||||
video_grid_thw
|
||||
)
|
||||
|
||||
# In v4 `get_rope_index` doesn't have wildcard `kwargs`, and
|
||||
# can't accept arbitrary args, even if its value is `None`
|
||||
kwargs = {}
|
||||
if mm_token_type_ids:
|
||||
if not hasattr(self, "_get_rope_index_accepts_mm_token_type_ids"):
|
||||
import inspect
|
||||
|
||||
sig = inspect.signature(self.model.get_rope_index)
|
||||
params = sig.parameters
|
||||
self._get_rope_index_accepts_mm_token_type_ids = (
|
||||
"mm_token_type_ids" in params
|
||||
or any(
|
||||
p.kind == inspect.Parameter.VAR_KEYWORD for p in params.values()
|
||||
)
|
||||
)
|
||||
if self._get_rope_index_accepts_mm_token_type_ids:
|
||||
kwargs["mm_token_type_ids"] = torch.cat(mm_token_type_ids)
|
||||
|
||||
mrope_positions, mrope_position_delta = self.model.get_rope_index(
|
||||
input_ids=torch.tensor(input_tokens).unsqueeze(0),
|
||||
image_grid_thw=image_grid_thw,
|
||||
video_grid_thw=video_grid_thw,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
mrope_positions = mrope_positions[:, 0]
|
||||
|
||||
Reference in New Issue
Block a user