Depreate global_server_args_dict (#11528)

2025-10-13 19:34:43 +08:00
parent 0b6f535f66
commit 516738b096
54 changed files with 240 additions and 321 deletions
--- a/test/srt/test_gptqmodel_dynamic.py
+++ b/test/srt/test_gptqmodel_dynamic.py
@@ -4,6 +4,7 @@ import unittest
 import requests
 import torch

+from sglang.srt.server_args import set_global_server_args_for_scheduler
 from sglang.srt.utils import kill_process_tree
 from sglang.test.test_utils import (
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
@@ -16,17 +17,15 @@ from sglang.test.test_utils import (
 def check_quant_method(model_path: str, use_marlin_kernel: bool):
    from sglang.srt.configs.device_config import DeviceConfig
    from sglang.srt.configs.load_config import LoadConfig
-    from sglang.srt.configs.model_config import AttentionArch, ModelConfig
+    from sglang.srt.configs.model_config import ModelConfig
    from sglang.srt.distributed import (
-        get_tp_group,
        init_distributed_environment,
        initialize_model_parallel,
-        set_custom_all_reduce,
    )
    from sglang.srt.distributed.parallel_state import monkey_patch_vllm_parallel_state
    from sglang.srt.layers.quantization.utils import get_dynamic_override
    from sglang.srt.model_loader import get_model
-    from sglang.srt.server_args import PortArgs, ServerArgs
+    from sglang.srt.server_args import ServerArgs

    try:
        init_distributed_environment(
@@ -43,6 +42,7 @@ def check_quant_method(model_path: str, use_marlin_kernel: bool):
        pass

    server_args = ServerArgs(model_path=model_path, dtype=torch.float16)
+    set_global_server_args_for_scheduler(server_args)
    model_config = ModelConfig.from_server_args(server_args)

    load_config = LoadConfig()