DP Attention with Auto DeepEP Dispatch (#7222)

This commit is contained in:
Cheng Wan
2025-07-05 01:54:24 -07:00
committed by GitHub
parent 75354d9ae9
commit 8fc910db03
13 changed files with 136 additions and 90 deletions

View File

@@ -2202,14 +2202,14 @@ class DeepEPMode(Enum):
def enable_low_latency(self):
return self in [DeepEPMode.low_latency, DeepEPMode.auto]
def resolve(self, forward_mode):
def resolve(self, is_extend_in_batch: bool):
if self != DeepEPMode.auto:
return self
if forward_mode.is_decode():
return DeepEPMode.low_latency
else:
if is_extend_in_batch:
return DeepEPMode.normal
else:
return DeepEPMode.low_latency
def is_non_idle_and_non_empty(forward_mode, hidden_states):