diff --git a/python/sglang/srt/model_executor/forward_batch_info.py b/python/sglang/srt/model_executor/forward_batch_info.py index 95239c2f9..0243fe89e 100644 --- a/python/sglang/srt/model_executor/forward_batch_info.py +++ b/python/sglang/srt/model_executor/forward_batch_info.py @@ -734,9 +734,8 @@ class ForwardBatch: self.encoder_lens = self._pad_tensor_to_size(self.encoder_lens, bs) self.positions = self._pad_tensor_to_size(self.positions, num_tokens) self.global_num_tokens_cpu = global_num_tokens - self.global_num_tokens_gpu = self.global_num_tokens_gpu.new_tensor( - global_num_tokens - ) + global_num_tokens_pinned = torch.tensor(global_num_tokens, pin_memory=True) + self.global_num_tokens_gpu.copy_(global_num_tokens_pinned, non_blocking=True) if self.mrope_positions is not None: self.mrope_positions = self._pad_tensor_to_size(self.mrope_positions, bs)