Revert "Deprecate global_server_args_dict" (#11520)

2025-10-12 17:40:40 -07:00
parent 6cd296940a
commit 1bdd010291
54 changed files with 321 additions and 240 deletions
--- a/test/srt/test_gptqmodel_dynamic.py
+++ b/test/srt/test_gptqmodel_dynamic.py
@@ -4,7 +4,6 @@ import unittest
 import requests
 import torch

-from sglang.srt.server_args import set_global_server_args_for_scheduler
 from sglang.srt.utils import kill_process_tree
 from sglang.test.test_utils import (
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
@@ -17,15 +16,17 @@ from sglang.test.test_utils import (
 def check_quant_method(model_path: str, use_marlin_kernel: bool):
    from sglang.srt.configs.device_config import DeviceConfig
    from sglang.srt.configs.load_config import LoadConfig
-    from sglang.srt.configs.model_config import ModelConfig
+    from sglang.srt.configs.model_config import AttentionArch, ModelConfig
    from sglang.srt.distributed import (
+        get_tp_group,
        init_distributed_environment,
        initialize_model_parallel,
+        set_custom_all_reduce,
    )
    from sglang.srt.distributed.parallel_state import monkey_patch_vllm_parallel_state
    from sglang.srt.layers.quantization.utils import get_dynamic_override
    from sglang.srt.model_loader import get_model
-    from sglang.srt.server_args import ServerArgs
+    from sglang.srt.server_args import PortArgs, ServerArgs

    try:
        init_distributed_environment(
@@ -42,7 +43,6 @@ def check_quant_method(model_path: str, use_marlin_kernel: bool):
        pass

    server_args = ServerArgs(model_path=model_path, dtype=torch.float16)
-    set_global_server_args_for_scheduler(server_args)
    model_config = ModelConfig.from_server_args(server_args)

    load_config = LoadConfig()