Simplify tests & Fix trtllm custom allreduce registration (#4252)

2025-03-10 01:24:22 -07:00
parent 007f8b3dc2
commit aa957102a9
13 changed files with 30 additions and 211 deletions
--- a/python/sglang/srt/_custom_ops.py
+++ b/python/sglang/srt/_custom_ops.py
@@ -6,10 +6,12 @@ from typing import List, Tuple
 import torch
 import torch.library

-from sglang.srt.utils import is_hip, is_hpu
+from sglang.srt.utils import get_bool_env_var, is_hip, is_hpu

 logger = logging.getLogger(__name__)
-use_vllm_custom_allreduce = os.environ.get("USE_VLLM_CUSTOM_ALLREDUCE", default=True)
+use_vllm_custom_allreduce = get_bool_env_var(
+    "USE_VLLM_CUSTOM_ALLREDUCE", default="true"
+)

 if not is_hpu():
    # ROCm does not use vllm custom allreduce
--- a/python/sglang/srt/layers/attention/flashinfer_backend.py
+++ b/python/sglang/srt/layers/attention/flashinfer_backend.py
@@ -22,7 +22,7 @@ from sglang.srt.layers.attention.utils import create_flashinfer_kv_indices_trito
 from sglang.srt.layers.dp_attention import get_attention_tp_size
 from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
 from sglang.srt.speculative.eagle_utils import EagleDraftInput, EagleVerifyInput
-from sglang.srt.utils import is_flashinfer_available
+from sglang.srt.utils import get_bool_env_var, is_flashinfer_available

 if TYPE_CHECKING:
    from sglang.srt.layers.radix_attention import RadixAttention
--- a/python/sglang/srt/model_loader/loader.py
+++ b/python/sglang/srt/model_loader/loader.py
@@ -48,6 +48,7 @@ from sglang.srt.model_loader.weight_utils import (
    safetensors_weights_iterator,
 )
 from sglang.srt.utils import (
+    get_bool_env_var,
    get_device_capability,
    is_pin_memory_available,
    set_weight_attrs,
@@ -197,7 +198,7 @@ class DefaultModelLoader(BaseModelLoader):

        Returns the path to the downloaded model, or None if the model is not
        downloaded from ModelScope."""
-        if os.environ.get("SGLANG_USE_MODELSCOPE", None) == "True":
+        if get_bool_env_var("SGLANG_USE_MODELSCOPE"):
            # download model from ModelScope hub,
            # lazy import so that modelscope is not required for normal use.
            # pylint: disable=C.