Upgrade to vllm 0.17.0 corex v4.1 overlay

2026-04-29 19:38:22 +08:00
parent 8fac6062e4
commit 938d0854a5
430 changed files with 35969 additions and 14511 deletions
--- a/vllm/model_executor/models/qwen2_5_vl.py
+++ b/vllm/model_executor/models/qwen2_5_vl.py
@@ -195,6 +195,8 @@ class Qwen2_5_VLVideoPixelInputs(TensorSchema):
        - second_per_grid_ts: The video time interval (in seconds) for each
          grid along the temporal dimension in the 3D position IDs. Returned
          when `videos` is not `None`.
+        - timestamps: List of timestamp values (in seconds) for each frame
+          after merging. Length equals the temporal dimension after merging.
    """

    type: Literal["pixel_values_videos"]
@@ -214,6 +216,8 @@ class Qwen2_5_VLVideoPixelInputs(TensorSchema):
        TensorShape("nv"),
    ]

+    timestamps: list[list[float]] | None = None
+

 class Qwen2_5_VLVideoEmbeddingInputs(TensorSchema):
    """
@@ -232,6 +236,8 @@ class Qwen2_5_VLVideoEmbeddingInputs(TensorSchema):
        - second_per_grid_ts: The video time interval (in seconds) for each
          grid along the temporal dimension in the 3D position IDs. Returned
          when `videos` is not `None`.
+        - timestamps: List of timestamp values (in seconds) for each frame
+          after merging. Length equals the temporal dimension after merging.
    """

    type: Literal["video_embeds"]
@@ -250,6 +256,7 @@ class Qwen2_5_VLVideoEmbeddingInputs(TensorSchema):
        torch.Tensor | None,
        TensorShape("nv"),
    ] = None
+    timestamps: list[list[float]] | None = None


 Qwen2_5_VLVideoInputs: TypeAlias = (
@@ -289,10 +296,11 @@ class Qwen2_5_VisionMLP(nn.Module):
            disable_tp=use_data_parallel,
        )
        self.act_fn = act_fn
+        self.hidden_features = hidden_features

    def forward(self, x: torch.Tensor):
        gate_up, _ = self.gate_up_proj(x)
-        x = self.act_fn(gate_up)
+        x = self.act_fn(gate_up, self.hidden_features)
        x_down, _ = self.down_proj(x)
        return x_down

@@ -357,6 +365,7 @@ class Qwen2_5_VisionAttention(nn.Module):
        rotary_pos_emb_cos: torch.Tensor,
        rotary_pos_emb_sin: torch.Tensor,
        max_seqlen: torch.Tensor,  # Only used for Flash Attention
+        sequence_lengths: torch.Tensor,  # Only used for FlashInfer CuDNN backend
    ) -> torch.Tensor:
        # [s, b, c] --> [s, b, head * 3 * head_dim]
        x, _ = self.qkv(x)
@@ -398,6 +407,7 @@ class Qwen2_5_VisionAttention(nn.Module):
            value=v,
            cu_seqlens=cu_seqlens,
            max_seqlen=max_seqlen,
+            sequence_lengths=sequence_lengths,
        )

        context_layer = einops.rearrange(
@@ -463,6 +473,7 @@ class Qwen2_5_VisionBlock(nn.Module):
            rotary_pos_emb_cos=rotary_pos_emb_cos,
            rotary_pos_emb_sin=rotary_pos_emb_sin,
            max_seqlen=max_seqlen,
+            sequence_lengths=None,
        )
        x_fused_norm, residual = self.norm2(x, residual=x_attn)
        x = residual + self.mlp(x_fused_norm)