From 8441baad6e3fee27f6b66a757eda5d831751ec5b Mon Sep 17 00:00:00 2001
From: Yineng Zhang <me@zhyncs.com>
Date: Wed, 30 Apr 2025 19:49:26 -0700
Subject: [PATCH] fix: update model runner (#5934)

---
 python/sglang/srt/model_executor/model_runner.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py
index 5537daf18..bf2c91080 100644
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -81,7 +81,6 @@ from sglang.srt.utils import (
     get_available_gpu_memory,
     get_bool_env_var,
     init_custom_process_group,
-    is_ampere_with_cuda_12_3,
     is_cuda,
     is_fa3_default_architecture,
     is_flashinfer_available,
@@ -264,7 +263,7 @@ class ModelRunner:
             if not self.use_mla_backend:
                 # MHA architecture
                 if (
-                    (is_ampere_with_cuda_12_3() or is_hopper_with_cuda_12_3())
+                    is_hopper_with_cuda_12_3()
                     and is_no_spec_infer_or_topk_one(server_args)
                     and is_fa3_default_architecture(self.model_config.hf_config)
                 ):