Upgrade to vllm 0.17.0 corex v4.1 overlay

This commit is contained in:
2026-04-29 19:38:22 +08:00
parent 8fac6062e4
commit 938d0854a5
430 changed files with 35969 additions and 14511 deletions

View File

@@ -195,6 +195,8 @@ class Qwen2_5_VLVideoPixelInputs(TensorSchema):
- second_per_grid_ts: The video time interval (in seconds) for each
grid along the temporal dimension in the 3D position IDs. Returned
when `videos` is not `None`.
- timestamps: List of timestamp values (in seconds) for each frame
after merging. Length equals the temporal dimension after merging.
"""
type: Literal["pixel_values_videos"]
@@ -214,6 +216,8 @@ class Qwen2_5_VLVideoPixelInputs(TensorSchema):
TensorShape("nv"),
]
timestamps: list[list[float]] | None = None
class Qwen2_5_VLVideoEmbeddingInputs(TensorSchema):
"""
@@ -232,6 +236,8 @@ class Qwen2_5_VLVideoEmbeddingInputs(TensorSchema):
- second_per_grid_ts: The video time interval (in seconds) for each
grid along the temporal dimension in the 3D position IDs. Returned
when `videos` is not `None`.
- timestamps: List of timestamp values (in seconds) for each frame
after merging. Length equals the temporal dimension after merging.
"""
type: Literal["video_embeds"]
@@ -250,6 +256,7 @@ class Qwen2_5_VLVideoEmbeddingInputs(TensorSchema):
torch.Tensor | None,
TensorShape("nv"),
] = None
timestamps: list[list[float]] | None = None
Qwen2_5_VLVideoInputs: TypeAlias = (
@@ -289,10 +296,11 @@ class Qwen2_5_VisionMLP(nn.Module):
disable_tp=use_data_parallel,
)
self.act_fn = act_fn
self.hidden_features = hidden_features
def forward(self, x: torch.Tensor):
gate_up, _ = self.gate_up_proj(x)
x = self.act_fn(gate_up)
x = self.act_fn(gate_up, self.hidden_features)
x_down, _ = self.down_proj(x)
return x_down
@@ -357,6 +365,7 @@ class Qwen2_5_VisionAttention(nn.Module):
rotary_pos_emb_cos: torch.Tensor,
rotary_pos_emb_sin: torch.Tensor,
max_seqlen: torch.Tensor, # Only used for Flash Attention
sequence_lengths: torch.Tensor, # Only used for FlashInfer CuDNN backend
) -> torch.Tensor:
# [s, b, c] --> [s, b, head * 3 * head_dim]
x, _ = self.qkv(x)
@@ -398,6 +407,7 @@ class Qwen2_5_VisionAttention(nn.Module):
value=v,
cu_seqlens=cu_seqlens,
max_seqlen=max_seqlen,
sequence_lengths=sequence_lengths,
)
context_layer = einops.rearrange(
@@ -463,6 +473,7 @@ class Qwen2_5_VisionBlock(nn.Module):
rotary_pos_emb_cos=rotary_pos_emb_cos,
rotary_pos_emb_sin=rotary_pos_emb_sin,
max_seqlen=max_seqlen,
sequence_lengths=None,
)
x_fused_norm, residual = self.norm2(x, residual=x_attn)
x = residual + self.mlp(x_fused_norm)