FA3 Spec Decoding to support top k = 1 and add cuda graph support (#5050)

Co-authored-by: Qingquan Song <ustcsqq@gmail.com>
Co-authored-by: Chunan Zeng <zcnrex@gmail.com>
This commit is contained in:
Stefan He
2025-04-04 23:03:59 -07:00
committed by GitHub
parent 3f287b8579
commit ca8d02abd5
2 changed files with 270 additions and 119 deletions

View File

@@ -104,6 +104,9 @@ class ForwardMode(IntEnum):
or self == ForwardMode.IDLE
)
def is_extend_or_draft_extend(self):
return self == ForwardMode.EXTEND or self == ForwardMode.DRAFT_EXTEND
def is_dummy_first(self):
return self == ForwardMode.DUMMY_FIRST