From 8441baad6e3fee27f6b66a757eda5d831751ec5b Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Wed, 30 Apr 2025 19:49:26 -0700 Subject: [PATCH] fix: update model runner (#5934) --- python/sglang/srt/model_executor/model_runner.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 5537daf18..bf2c91080 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -81,7 +81,6 @@ from sglang.srt.utils import ( get_available_gpu_memory, get_bool_env_var, init_custom_process_group, - is_ampere_with_cuda_12_3, is_cuda, is_fa3_default_architecture, is_flashinfer_available, @@ -264,7 +263,7 @@ class ModelRunner: if not self.use_mla_backend: # MHA architecture if ( - (is_ampere_with_cuda_12_3() or is_hopper_with_cuda_12_3()) + is_hopper_with_cuda_12_3() and is_no_spec_infer_or_topk_one(server_args) and is_fa3_default_architecture(self.model_config.hf_config) ):