Fix CUDA illegal memory access issues in speculative decoding (#10892)

This commit is contained in:
ur4t
2025-10-03 22:44:07 +08:00
committed by GitHub
parent fdc4e1e570
commit 6ae3f05b33
2 changed files with 2 additions and 0 deletions

View File

@@ -302,6 +302,7 @@ class EAGLEDraftCudaGraphRunner:
if bs != raw_bs:
self.seq_lens.fill_(self.seq_len_fill_value)
self.out_cache_loc.zero_()
self.positions.zero_()
num_tokens = bs * self.num_tokens_per_bs

View File

@@ -332,6 +332,7 @@ class EAGLEDraftExtendCudaGraphRunner:
if bs * self.num_tokens_per_bs != num_tokens:
self.seq_lens.fill_(self.seq_len_fill_value)
self.out_cache_loc.zero_()
self.positions.zero_()
self.accept_length.fill_(1)
self.extend_seq_lens.fill_(1)