Move deep gemm related arguments to sglang.srt.environ (#11547)

This commit is contained in:
Liangsheng Yin
2025-10-14 00:34:35 +08:00
committed by GitHub
parent bfadb5ea5f
commit acc2327bbd
20 changed files with 187 additions and 189 deletions

View File

@@ -4,6 +4,7 @@ from types import SimpleNamespace
import requests
from sglang.srt.environ import envs
from sglang.srt.utils import get_device_sm, kill_process_tree
from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
from sglang.test.test_utils import (
@@ -49,18 +50,20 @@ class TestHybridAttnBackendBase(CustomTestCase):
def setUpClass(cls):
# disable deep gemm precompile to make launch server faster
# please don't do this if you want to make your inference workload faster
os.environ["SGL_JIT_DEEPGEMM_PRECOMPILE"] = "false"
os.environ["SGL_ENABLE_JIT_DEEPGEMM"] = "false"
if cls.speculative_decode:
model = DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST
else:
model = cls.model
cls.process = popen_launch_server(
model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=cls.get_server_args(),
)
with (
envs.SGLANG_JIT_DEEPGEMM_PRECOMPILE.override(False),
envs.SGLANG_ENABLE_JIT_DEEPGEMM.override(False),
):
if cls.speculative_decode:
model = DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST
else:
model = cls.model
cls.process = popen_launch_server(
model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=cls.get_server_args(),
)
@classmethod
def tearDownClass(cls):