From 24a8cee66d9f99e7077bb6d9b74936e8641922cb Mon Sep 17 00:00:00 2001 From: Binyao Jiang Date: Mon, 25 Aug 2025 13:46:28 -0700 Subject: [PATCH] Fix GLM45v launch server cuda torch compile bug (#9554) --- python/sglang/srt/models/qwen2_5_vl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/sglang/srt/models/qwen2_5_vl.py b/python/sglang/srt/models/qwen2_5_vl.py index 48270ee21..59f3e6370 100644 --- a/python/sglang/srt/models/qwen2_5_vl.py +++ b/python/sglang/srt/models/qwen2_5_vl.py @@ -526,6 +526,7 @@ class Qwen2_5_VLForConditionalGeneration(nn.Module): def get_input_embeddings(self): return self.model.embed_tokens + @torch.no_grad() def forward( self, input_ids: torch.Tensor,