Abstraction for spec worker and code cleanup (#11643)
This commit is contained in:
@@ -215,10 +215,10 @@ class GenerationBatchResult:
|
||||
delay_sample_func: Optional[callable] = None
|
||||
future_indices: Optional[FutureIndices] = None
|
||||
|
||||
# FIXME(lsyin): maybe move to <BetterPlace> ?
|
||||
# FIXME(lsyin): maybe move to a better place?
|
||||
# sync path: forward stream -> output processor
|
||||
accept_lens: Optional[torch.Tensor] = None
|
||||
last_batch_allocate_lens: Optional[torch.Tensor] = None
|
||||
allocate_lens: Optional[torch.Tensor] = None
|
||||
|
||||
# relay path: forward stream -> next step forward
|
||||
next_draft_input: Optional[EagleDraftInput] = None
|
||||
@@ -246,10 +246,8 @@ class GenerationBatchResult:
|
||||
if self.accept_lens is not None:
|
||||
self.accept_lens = self.accept_lens.to("cpu", non_blocking=True)
|
||||
|
||||
if self.last_batch_allocate_lens is not None:
|
||||
self.last_batch_allocate_lens = self.last_batch_allocate_lens.to(
|
||||
"cpu", non_blocking=True
|
||||
)
|
||||
if self.allocate_lens is not None:
|
||||
self.allocate_lens = self.allocate_lens.to("cpu", non_blocking=True)
|
||||
|
||||
self.copy_done.record()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user