From be740acdb0ad339d31341dbba313fa16acb91aa5 Mon Sep 17 00:00:00 2001 From: Vincent Zhong <207368749+vincentzed@users.noreply.github.com> Date: Sun, 12 Oct 2025 08:25:30 -0400 Subject: [PATCH] [smol] [perf] Qwen3-VL in place op. (#11481) Signed-off-by: vincentzed <207368749+vincentzed@users.noreply.github.com> --- python/sglang/srt/models/qwen3_vl.py | 11 ++++------- python/sglang/srt/models/qwen3_vl_moe.py | 7 +++---- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/python/sglang/srt/models/qwen3_vl.py b/python/sglang/srt/models/qwen3_vl.py index 8649807a5..0f8995307 100644 --- a/python/sglang/srt/models/qwen3_vl.py +++ b/python/sglang/srt/models/qwen3_vl.py @@ -189,10 +189,10 @@ class Qwen3_VisionBlock(nn.Module): position_embeddings=position_embeddings, ) attn = rearrange(attn, "b s ... -> s b ...") - x = x + attn + x += attn norm2 = self.norm2(x) mlp = self.mlp(norm2) - x = x + mlp + x += mlp return x @@ -441,7 +441,7 @@ class Qwen3_VisionTransformer(nn.Module): x = self.patch_embed(x) pos_embeds = self.fast_pos_embed_interpolate(grid_thw) - x = x + pos_embeds + x += pos_embeds rotary_pos_emb = self.rot_pos_emb(grid_thw) seq_len, _ = x.size() @@ -574,10 +574,7 @@ class Qwen3LLMModel(Qwen3Model): and layer_idx in self.deepstack_embed_to_decoder_layer ): sep = self.hidden_size * layer_idx - hidden_states = ( - hidden_states - + input_deepstack_embeds[:, sep : sep + self.hidden_size] - ) + hidden_states += input_deepstack_embeds[:, sep : sep + self.hidden_size] if not self.pp_group.is_last_rank: return PPProxyTensors( diff --git a/python/sglang/srt/models/qwen3_vl_moe.py b/python/sglang/srt/models/qwen3_vl_moe.py index 08c1d1758..125114749 100644 --- a/python/sglang/srt/models/qwen3_vl_moe.py +++ b/python/sglang/srt/models/qwen3_vl_moe.py @@ -114,7 +114,7 @@ class Qwen3MoeLLMModel(Qwen3MoeModel): for layer_idx, layer in enumerate( self.layers[self.start_layer : self.end_layer] ): - layer_idx = layer_idx + self.start_layer + layer_idx += self.start_layer if layer_idx in self.layers_to_capture: aux_hidden_states.append( hidden_states + residual if residual is not None else hidden_states @@ -130,9 +130,8 @@ class Qwen3MoeLLMModel(Qwen3MoeModel): # process deepstack if input_deepstack_embeds is not None and layer_idx in range(3): sep = self.hidden_size * layer_idx - hidden_states = ( - hidden_states - + input_deepstack_embeds[:, sep : sep + self.hidden_size] + hidden_states.add_( + input_deepstack_embeds[:, sep : sep + self.hidden_size] ) if not self.pp_group.is_last_rank: