Add initial support for gpt-oss (#8824)

This commit is contained in:
Ying Sheng
2025-08-05 13:42:01 -07:00
committed by GitHub
parent 556e4143f0
commit c1d2061f97
12 changed files with 1595 additions and 47 deletions

View File

@@ -457,6 +457,10 @@ class ServerArgs:
raise ValueError(
"trtllm_mla backend does not support speculative decoding yet."
)
model_arch = self.get_hf_config().architectures[0]
if model_arch in ["GptOssForCausalLM"]:
self.attention_backend = "triton"
self.enable_triton_kernel_moe = True
# Set page size
if self.page_size is None: