From 0a0dd34e6a685320b4ecceac1646f4e04c6d39d0 Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Sun, 20 Apr 2025 17:20:53 +0800 Subject: [PATCH] Fix BumpAllocator error when no input_ids (#5564) --- python/sglang/srt/models/deepseek_nextn.py | 4 +++- python/sglang/srt/models/deepseek_v2.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/models/deepseek_nextn.py b/python/sglang/srt/models/deepseek_nextn.py index b77ad0c9a..01ee187ef 100644 --- a/python/sglang/srt/models/deepseek_nextn.py +++ b/python/sglang/srt/models/deepseek_nextn.py @@ -94,7 +94,9 @@ class DeepseekModelNextN(nn.Module): zero_allocator = BumpAllocator( buffer_size=2, dtype=torch.float32, - device=input_ids.device, + device=( + input_embeds.device if input_embeds is not None else input_ids.device + ), ) if input_embeds is None: diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 625df4642..b3bd49173 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -1374,7 +1374,9 @@ class DeepseekV2Model(nn.Module): # TODO for two-batch-overlap, we need a larger buffer size buffer_size=len(self.layers) * 2, dtype=torch.float32, - device=input_ids.device, + device=( + input_embeds.device if input_embeds is not None else input_ids.device + ), ) if input_embeds is None: