Move deep gemm related arguments to sglang.srt.environ (#11547)
This commit is contained in:
@@ -4,6 +4,7 @@ from types import SimpleNamespace
|
||||
|
||||
import requests
|
||||
|
||||
from sglang.srt.environ import envs
|
||||
from sglang.srt.utils import get_device_sm, kill_process_tree
|
||||
from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
|
||||
from sglang.test.test_utils import (
|
||||
@@ -49,18 +50,20 @@ class TestHybridAttnBackendBase(CustomTestCase):
|
||||
def setUpClass(cls):
|
||||
# disable deep gemm precompile to make launch server faster
|
||||
# please don't do this if you want to make your inference workload faster
|
||||
os.environ["SGL_JIT_DEEPGEMM_PRECOMPILE"] = "false"
|
||||
os.environ["SGL_ENABLE_JIT_DEEPGEMM"] = "false"
|
||||
if cls.speculative_decode:
|
||||
model = DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST
|
||||
else:
|
||||
model = cls.model
|
||||
cls.process = popen_launch_server(
|
||||
model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=cls.get_server_args(),
|
||||
)
|
||||
with (
|
||||
envs.SGLANG_JIT_DEEPGEMM_PRECOMPILE.override(False),
|
||||
envs.SGLANG_ENABLE_JIT_DEEPGEMM.override(False),
|
||||
):
|
||||
if cls.speculative_decode:
|
||||
model = DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST
|
||||
else:
|
||||
model = cls.model
|
||||
cls.process = popen_launch_server(
|
||||
model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=cls.get_server_args(),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
|
||||
Reference in New Issue
Block a user