Abstraction for spec worker and code cleanup (#11643)

This commit is contained in:
Liangsheng Yin
2025-10-17 23:31:36 +08:00
committed by GitHub
parent 3e4c7da2f5
commit cde5a6e30f
14 changed files with 707 additions and 461 deletions

View File

@@ -215,10 +215,10 @@ class GenerationBatchResult:
delay_sample_func: Optional[callable] = None
future_indices: Optional[FutureIndices] = None
# FIXME(lsyin): maybe move to <BetterPlace> ?
# FIXME(lsyin): maybe move to a better place?
# sync path: forward stream -> output processor
accept_lens: Optional[torch.Tensor] = None
last_batch_allocate_lens: Optional[torch.Tensor] = None
allocate_lens: Optional[torch.Tensor] = None
# relay path: forward stream -> next step forward
next_draft_input: Optional[EagleDraftInput] = None
@@ -246,10 +246,8 @@ class GenerationBatchResult:
if self.accept_lens is not None:
self.accept_lens = self.accept_lens.to("cpu", non_blocking=True)
if self.last_batch_allocate_lens is not None:
self.last_batch_allocate_lens = self.last_batch_allocate_lens.to(
"cpu", non_blocking=True
)
if self.allocate_lens is not None:
self.allocate_lens = self.allocate_lens.to("cpu", non_blocking=True)
self.copy_done.record()