feat: add trtllm-gen mha from direct call (#8782)

Co-authored-by: Baizhou Zhang <sobereddiezhang@gmail.com>
This commit is contained in:
eigen
2025-08-05 06:28:39 -04:00
committed by GitHub
parent 75df31b60e
commit 40e3b2beeb
4 changed files with 351 additions and 0 deletions

View File

@@ -1449,6 +1449,17 @@ class ModelRunner:
from sglang.srt.layers.attention.trtllm_mla_backend import TRTLLMMLABackend
return TRTLLMMLABackend(self)
elif self.server_args.attention_backend == "trtllm_mha":
if self.use_mla_backend:
raise ValueError(
"trtllm_mha backend can only be used with non-MLA models."
)
from sglang.srt.layers.attention.trtllm_mha_backend import (
TRTLLMHAAttnBackend,
)
return TRTLLMHAAttnBackend(self)
elif self.server_args.attention_backend == "intel_amx":
from sglang.srt.layers.attention.intel_amx_backend import (
IntelAMXAttnBackend,