fix(typo): fix reply to replay in base_attn_backend.py (#4784)

This commit is contained in:
Thysrael
2025-03-26 15:19:12 +08:00
committed by GitHub
parent 26f07294f1
commit ced35a0649

View File

@@ -47,7 +47,7 @@ class AttentionBackend(ABC):
spec_info: Optional[Union[EagleDraftInput, EagleVerifyInput]], spec_info: Optional[Union[EagleDraftInput, EagleVerifyInput]],
seq_lens_cpu: Optional[torch.Tensor], seq_lens_cpu: Optional[torch.Tensor],
): ):
"""Init the metadata for a forward pass for replying a cuda graph.""" """Init the metadata for a forward pass for replaying a cuda graph."""
raise NotImplementedError() raise NotImplementedError()
def get_cuda_graph_seq_len_fill_value(self): def get_cuda_graph_seq_len_fill_value(self):