Support sliding window in triton backend (#6509)

This commit is contained in:
Jianan Ji
2025-05-30 04:11:53 -04:00
committed by GitHub
parent d279d4990c
commit 22630ca242
6 changed files with 350 additions and 13 deletions

View File

@@ -1025,10 +1025,6 @@ class ModelRunner:
return AiterAttnBackend(self)
elif self.server_args.attention_backend == "triton":
assert self.sliding_window_size is None, (
"Window attention is not supported in the triton attention backend. "
"Please use `--attention-backend flashinfer`."
)
assert not self.model_config.is_encoder_decoder, (
"Cross attention is not supported in the triton attention backend. "
"Please use `--attention-backend flashinfer`."