Simplify tests & Fix trtllm custom allreduce registration (#4252)
This commit is contained in:
@@ -6,10 +6,12 @@ from typing import List, Tuple
|
||||
import torch
|
||||
import torch.library
|
||||
|
||||
from sglang.srt.utils import is_hip, is_hpu
|
||||
from sglang.srt.utils import get_bool_env_var, is_hip, is_hpu
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
use_vllm_custom_allreduce = os.environ.get("USE_VLLM_CUSTOM_ALLREDUCE", default=True)
|
||||
use_vllm_custom_allreduce = get_bool_env_var(
|
||||
"USE_VLLM_CUSTOM_ALLREDUCE", default="true"
|
||||
)
|
||||
|
||||
if not is_hpu():
|
||||
# ROCm does not use vllm custom allreduce
|
||||
|
||||
@@ -22,7 +22,7 @@ from sglang.srt.layers.attention.utils import create_flashinfer_kv_indices_trito
|
||||
from sglang.srt.layers.dp_attention import get_attention_tp_size
|
||||
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
|
||||
from sglang.srt.speculative.eagle_utils import EagleDraftInput, EagleVerifyInput
|
||||
from sglang.srt.utils import is_flashinfer_available
|
||||
from sglang.srt.utils import get_bool_env_var, is_flashinfer_available
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from sglang.srt.layers.radix_attention import RadixAttention
|
||||
|
||||
@@ -48,6 +48,7 @@ from sglang.srt.model_loader.weight_utils import (
|
||||
safetensors_weights_iterator,
|
||||
)
|
||||
from sglang.srt.utils import (
|
||||
get_bool_env_var,
|
||||
get_device_capability,
|
||||
is_pin_memory_available,
|
||||
set_weight_attrs,
|
||||
@@ -197,7 +198,7 @@ class DefaultModelLoader(BaseModelLoader):
|
||||
|
||||
Returns the path to the downloaded model, or None if the model is not
|
||||
downloaded from ModelScope."""
|
||||
if os.environ.get("SGLANG_USE_MODELSCOPE", None) == "True":
|
||||
if get_bool_env_var("SGLANG_USE_MODELSCOPE"):
|
||||
# download model from ModelScope hub,
|
||||
# lazy import so that modelscope is not required for normal use.
|
||||
# pylint: disable=C.
|
||||
|
||||
Reference in New Issue
Block a user