Co-authored-by: averyhuang <averyh@nvidia.com>
This commit is contained in:
@@ -1443,13 +1443,13 @@ class ModelRunner:
|
||||
)
|
||||
|
||||
return CutlassMLABackend(self)
|
||||
elif self.server_args.attention_backend == "trtllm_mla":
|
||||
elif backend_str == "trtllm_mla":
|
||||
if not self.use_mla_backend:
|
||||
raise ValueError("trtllm_mla backend can only be used with MLA models.")
|
||||
from sglang.srt.layers.attention.trtllm_mla_backend import TRTLLMMLABackend
|
||||
|
||||
return TRTLLMMLABackend(self)
|
||||
elif self.server_args.attention_backend == "trtllm_mha":
|
||||
elif backend_str == "trtllm_mha":
|
||||
if self.use_mla_backend:
|
||||
raise ValueError(
|
||||
"trtllm_mha backend can only be used with non-MLA models."
|
||||
@@ -1460,7 +1460,7 @@ class ModelRunner:
|
||||
|
||||
return TRTLLMHAAttnBackend(self)
|
||||
|
||||
elif self.server_args.attention_backend == "intel_amx":
|
||||
elif backend_str == "intel_amx":
|
||||
from sglang.srt.layers.attention.intel_amx_backend import (
|
||||
IntelAMXAttnBackend,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user