diff --git a/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py b/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py index e03626988..a6d5582c3 100644 --- a/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +++ b/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py @@ -302,6 +302,7 @@ class EAGLEDraftCudaGraphRunner: if bs != raw_bs: self.seq_lens.fill_(self.seq_len_fill_value) self.out_cache_loc.zero_() + self.positions.zero_() num_tokens = bs * self.num_tokens_per_bs diff --git a/python/sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py b/python/sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py index edb37db27..72f182ed9 100644 --- a/python/sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py +++ b/python/sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py @@ -332,6 +332,7 @@ class EAGLEDraftExtendCudaGraphRunner: if bs * self.num_tokens_per_bs != num_tokens: self.seq_lens.fill_(self.seq_len_fill_value) self.out_cache_loc.zero_() + self.positions.zero_() self.accept_length.fill_(1) self.extend_seq_lens.fill_(1)