[Fix] DeepEP Compatibility with Low Latency (#5068)
Co-authored-by: ch-wan <cwan39@gatech.edu>
This commit is contained in:
@@ -72,7 +72,7 @@ class ForwardMode(IntEnum):
|
||||
DUMMY_FIRST = auto()
|
||||
|
||||
def is_prefill(self):
|
||||
return self == ForwardMode.PREFILL
|
||||
return self.is_extend()
|
||||
|
||||
def is_extend(self):
|
||||
return (
|
||||
|
||||
Reference in New Issue
Block a user