FA3 Spec Decoding to support top k = 1 and add cuda graph support (#5050)
Co-authored-by: Qingquan Song <ustcsqq@gmail.com> Co-authored-by: Chunan Zeng <zcnrex@gmail.com>
This commit is contained in:
@@ -104,6 +104,9 @@ class ForwardMode(IntEnum):
|
||||
or self == ForwardMode.IDLE
|
||||
)
|
||||
|
||||
def is_extend_or_draft_extend(self):
|
||||
return self == ForwardMode.EXTEND or self == ForwardMode.DRAFT_EXTEND
|
||||
|
||||
def is_dummy_first(self):
|
||||
return self == ForwardMode.DUMMY_FIRST
|
||||
|
||||
|
||||
Reference in New Issue
Block a user