Add initial support for gpt-oss (#8824)
This commit is contained in:
@@ -457,6 +457,10 @@ class ServerArgs:
|
||||
raise ValueError(
|
||||
"trtllm_mla backend does not support speculative decoding yet."
|
||||
)
|
||||
model_arch = self.get_hf_config().architectures[0]
|
||||
if model_arch in ["GptOssForCausalLM"]:
|
||||
self.attention_backend = "triton"
|
||||
self.enable_triton_kernel_moe = True
|
||||
|
||||
# Set page size
|
||||
if self.page_size is None:
|
||||
|
||||
Reference in New Issue
Block a user