Tiny move files to utils folder (#11166)
This commit is contained in:
@@ -8,7 +8,7 @@ from datasets import load_dataset
|
|||||||
|
|
||||||
import sglang as sgl
|
import sglang as sgl
|
||||||
from sglang.global_config import global_config
|
from sglang.global_config import global_config
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
add_common_sglang_args_and_parse,
|
add_common_sglang_args_and_parse,
|
||||||
select_sglang_backend,
|
select_sglang_backend,
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from pathlib import Path
|
|||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
import sglang as sgl
|
import sglang as sgl
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
add_common_sglang_args_and_parse,
|
add_common_sglang_args_and_parse,
|
||||||
select_sglang_backend,
|
select_sglang_backend,
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ This example demonstrates how to provide tokenized ids to LLM as input instead o
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import sglang as sgl
|
import sglang as sgl
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
|
|
||||||
MODEL_PATH = "meta-llama/Llama-3.1-8B-Instruct"
|
MODEL_PATH = "meta-llama/Llama-3.1-8B-Instruct"
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ python token_in_token_out_llm_server.py
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.test_utils import is_in_ci
|
from sglang.test.test_utils import is_in_ci
|
||||||
from sglang.utils import terminate_process, wait_for_server
|
from sglang.utils import terminate_process, wait_for_server
|
||||||
|
|
||||||
|
|||||||
@@ -60,7 +60,6 @@ import torch.distributed as dist
|
|||||||
from sglang.srt.configs.model_config import ModelConfig
|
from sglang.srt.configs.model_config import ModelConfig
|
||||||
from sglang.srt.distributed.parallel_state import destroy_distributed_environment
|
from sglang.srt.distributed.parallel_state import destroy_distributed_environment
|
||||||
from sglang.srt.entrypoints.engine import _set_envs_and_config
|
from sglang.srt.entrypoints.engine import _set_envs_and_config
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
|
||||||
from sglang.srt.layers.moe import initialize_moe_config
|
from sglang.srt.layers.moe import initialize_moe_config
|
||||||
from sglang.srt.managers.schedule_batch import Req, ScheduleBatch
|
from sglang.srt.managers.schedule_batch import Req, ScheduleBatch
|
||||||
from sglang.srt.managers.scheduler import Scheduler
|
from sglang.srt.managers.scheduler import Scheduler
|
||||||
@@ -78,6 +77,7 @@ from sglang.srt.utils import (
|
|||||||
set_gpu_proc_affinity,
|
set_gpu_proc_affinity,
|
||||||
suppress_other_loggers,
|
suppress_other_loggers,
|
||||||
)
|
)
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
|
|||||||
@@ -635,7 +635,7 @@ def get_tokenizer(
|
|||||||
if pretrained_model_name_or_path.endswith(
|
if pretrained_model_name_or_path.endswith(
|
||||||
".json"
|
".json"
|
||||||
) or pretrained_model_name_or_path.endswith(".model"):
|
) or pretrained_model_name_or_path.endswith(".model"):
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
|
|
||||||
return get_tokenizer(pretrained_model_name_or_path)
|
return get_tokenizer(pretrained_model_name_or_path)
|
||||||
|
|
||||||
|
|||||||
@@ -433,7 +433,7 @@ class Runtime:
|
|||||||
self.endpoint.cache_prefix(prefix)
|
self.endpoint.cache_prefix(prefix)
|
||||||
|
|
||||||
def get_tokenizer(self):
|
def get_tokenizer(self):
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
|
|
||||||
return get_tokenizer(
|
return get_tokenizer(
|
||||||
self.server_args.tokenizer_path,
|
self.server_args.tokenizer_path,
|
||||||
|
|||||||
@@ -23,16 +23,16 @@ import torch
|
|||||||
from transformers import PretrainedConfig
|
from transformers import PretrainedConfig
|
||||||
|
|
||||||
from sglang.srt.environ import envs
|
from sglang.srt.environ import envs
|
||||||
from sglang.srt.hf_transformers_utils import (
|
from sglang.srt.layers.quantization import QUANTIZATION_METHODS
|
||||||
|
from sglang.srt.server_args import ServerArgs
|
||||||
|
from sglang.srt.utils import is_hip, retry
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import (
|
||||||
get_config,
|
get_config,
|
||||||
get_context_length,
|
get_context_length,
|
||||||
get_generation_config,
|
get_generation_config,
|
||||||
get_hf_text_config,
|
get_hf_text_config,
|
||||||
get_sparse_attention_config,
|
get_sparse_attention_config,
|
||||||
)
|
)
|
||||||
from sglang.srt.layers.quantization import QUANTIZATION_METHODS
|
|
||||||
from sglang.srt.server_args import ServerArgs
|
|
||||||
from sglang.srt.utils import is_hip, retry
|
|
||||||
from sglang.utils import is_in_ci
|
from sglang.utils import is_in_ci
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|||||||
@@ -26,12 +26,12 @@ import torch
|
|||||||
from torch import nn
|
from torch import nn
|
||||||
|
|
||||||
from sglang.srt.configs.load_config import LoadConfig
|
from sglang.srt.configs.load_config import LoadConfig
|
||||||
from sglang.srt.hf_transformers_utils import AutoConfig
|
|
||||||
from sglang.srt.lora.backend.base_backend import BaseLoRABackend
|
from sglang.srt.lora.backend.base_backend import BaseLoRABackend
|
||||||
from sglang.srt.lora.backend.chunked_backend import ChunkedSgmvLoRABackend
|
from sglang.srt.lora.backend.chunked_backend import ChunkedSgmvLoRABackend
|
||||||
from sglang.srt.lora.backend.triton_backend import TritonLoRABackend
|
from sglang.srt.lora.backend.triton_backend import TritonLoRABackend
|
||||||
from sglang.srt.lora.lora_config import LoRAConfig
|
from sglang.srt.lora.lora_config import LoRAConfig
|
||||||
from sglang.srt.model_loader.loader import DefaultModelLoader
|
from sglang.srt.model_loader.loader import DefaultModelLoader
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import AutoConfig
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@@ -21,7 +21,6 @@ from typing import Dict, Iterable, List, Optional, Set, Tuple
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
from sglang.srt.configs.load_config import LoadConfig
|
from sglang.srt.configs.load_config import LoadConfig
|
||||||
from sglang.srt.hf_transformers_utils import AutoConfig
|
|
||||||
from sglang.srt.lora.backend.base_backend import BaseLoRABackend, get_backend_from_name
|
from sglang.srt.lora.backend.base_backend import BaseLoRABackend, get_backend_from_name
|
||||||
from sglang.srt.lora.layers import BaseLayerWithLoRA, get_lora_layer
|
from sglang.srt.lora.layers import BaseLayerWithLoRA, get_lora_layer
|
||||||
from sglang.srt.lora.lora import LoRAAdapter
|
from sglang.srt.lora.lora import LoRAAdapter
|
||||||
@@ -39,6 +38,7 @@ from sglang.srt.managers.io_struct import LoRAUpdateOutput
|
|||||||
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
|
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
|
||||||
from sglang.srt.server_args import ServerArgs
|
from sglang.srt.server_args import ServerArgs
|
||||||
from sglang.srt.utils import replace_submodule
|
from sglang.srt.utils import replace_submodule
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import AutoConfig
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ from typing import Callable, Dict, Iterable, List, Optional, Set, Tuple, Union
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
from sglang.srt.distributed import divide
|
from sglang.srt.distributed import divide
|
||||||
from sglang.srt.hf_transformers_utils import AutoConfig
|
|
||||||
from sglang.srt.lora.layers import BaseLayerWithLoRA
|
from sglang.srt.lora.layers import BaseLayerWithLoRA
|
||||||
from sglang.srt.lora.lora import LoRAAdapter
|
from sglang.srt.lora.lora import LoRAAdapter
|
||||||
from sglang.srt.lora.lora_config import LoRAConfig
|
from sglang.srt.lora.lora_config import LoRAConfig
|
||||||
@@ -17,6 +16,7 @@ from sglang.srt.lora.utils import (
|
|||||||
get_stacked_multiply,
|
get_stacked_multiply,
|
||||||
get_target_module_name,
|
get_target_module_name,
|
||||||
)
|
)
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import AutoConfig
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from typing import Iterable, Optional, Set, Tuple
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import AutoConfig
|
from sglang.srt.utils.hf_transformers_utils import AutoConfig
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|||||||
@@ -24,7 +24,6 @@ import psutil
|
|||||||
import setproctitle
|
import setproctitle
|
||||||
import zmq
|
import zmq
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
|
||||||
from sglang.srt.managers.io_struct import (
|
from sglang.srt.managers.io_struct import (
|
||||||
BatchEmbeddingOutput,
|
BatchEmbeddingOutput,
|
||||||
BatchMultimodalDecodeReq,
|
BatchMultimodalDecodeReq,
|
||||||
@@ -42,6 +41,7 @@ from sglang.srt.utils import (
|
|||||||
get_zmq_socket,
|
get_zmq_socket,
|
||||||
kill_itself_when_parent_died,
|
kill_itself_when_parent_died,
|
||||||
)
|
)
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.utils import (
|
from sglang.utils import (
|
||||||
TypeBasedDispatcher,
|
TypeBasedDispatcher,
|
||||||
find_printable_text,
|
find_printable_text,
|
||||||
|
|||||||
@@ -60,11 +60,6 @@ from sglang.srt.disaggregation.utils import (
|
|||||||
)
|
)
|
||||||
from sglang.srt.distributed import get_pp_group, get_world_group
|
from sglang.srt.distributed import get_pp_group, get_world_group
|
||||||
from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
|
from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
|
||||||
from sglang.srt.hf_transformers_utils import (
|
|
||||||
get_processor,
|
|
||||||
get_tokenizer,
|
|
||||||
get_tokenizer_from_processor,
|
|
||||||
)
|
|
||||||
from sglang.srt.layers.dp_attention import compute_dp_attention_world_info
|
from sglang.srt.layers.dp_attention import compute_dp_attention_world_info
|
||||||
from sglang.srt.layers.logits_processor import LogitsProcessorOutput
|
from sglang.srt.layers.logits_processor import LogitsProcessorOutput
|
||||||
from sglang.srt.layers.moe import initialize_moe_config
|
from sglang.srt.layers.moe import initialize_moe_config
|
||||||
@@ -190,6 +185,11 @@ from sglang.srt.utils import (
|
|||||||
set_random_seed,
|
set_random_seed,
|
||||||
suppress_other_loggers,
|
suppress_other_loggers,
|
||||||
)
|
)
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import (
|
||||||
|
get_processor,
|
||||||
|
get_tokenizer,
|
||||||
|
get_tokenizer_from_processor,
|
||||||
|
)
|
||||||
from sglang.utils import TypeBasedDispatcher, get_exception_traceback
|
from sglang.utils import TypeBasedDispatcher, get_exception_traceback
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ from enum import Enum, auto
|
|||||||
from typing import Any, List, Optional
|
from typing import Any, List, Optional
|
||||||
|
|
||||||
from sglang.srt.managers.io_struct import BlockReqInput, BlockReqType
|
from sglang.srt.managers.io_struct import BlockReqInput, BlockReqType
|
||||||
from sglang.srt.poll_based_barrier import PollBasedBarrier
|
from sglang.srt.utils.poll_based_barrier import PollBasedBarrier
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@@ -204,7 +204,7 @@ class SchedulerProfilerMixin:
|
|||||||
|
|
||||||
torch.distributed.barrier(self.tp_cpu_group)
|
torch.distributed.barrier(self.tp_cpu_group)
|
||||||
if self.tp_rank == 0:
|
if self.tp_rank == 0:
|
||||||
from sglang.srt.rpd_utils import rpd_to_chrome_trace
|
from sglang.srt.utils.rpd_utils import rpd_to_chrome_trace
|
||||||
|
|
||||||
rpd_to_chrome_trace("trace.rpd", self.rpd_profile_path)
|
rpd_to_chrome_trace("trace.rpd", self.rpd_profile_path)
|
||||||
self.rpd_profiler = None
|
self.rpd_profiler = None
|
||||||
|
|||||||
@@ -43,11 +43,6 @@ from fastapi import BackgroundTasks
|
|||||||
from sglang.srt.aio_rwlock import RWLock
|
from sglang.srt.aio_rwlock import RWLock
|
||||||
from sglang.srt.configs.model_config import ModelConfig
|
from sglang.srt.configs.model_config import ModelConfig
|
||||||
from sglang.srt.disaggregation.utils import DisaggregationMode
|
from sglang.srt.disaggregation.utils import DisaggregationMode
|
||||||
from sglang.srt.hf_transformers_utils import (
|
|
||||||
get_processor,
|
|
||||||
get_tokenizer,
|
|
||||||
get_tokenizer_from_processor,
|
|
||||||
)
|
|
||||||
from sglang.srt.lora.lora_registry import LoRARegistry
|
from sglang.srt.lora.lora_registry import LoRARegistry
|
||||||
from sglang.srt.managers.async_dynamic_batch_tokenizer import AsyncDynamicbatchTokenizer
|
from sglang.srt.managers.async_dynamic_batch_tokenizer import AsyncDynamicbatchTokenizer
|
||||||
from sglang.srt.managers.disagg_service import start_disagg_service
|
from sglang.srt.managers.disagg_service import start_disagg_service
|
||||||
@@ -99,6 +94,11 @@ from sglang.srt.utils import (
|
|||||||
get_zmq_socket,
|
get_zmq_socket,
|
||||||
kill_process_tree,
|
kill_process_tree,
|
||||||
)
|
)
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import (
|
||||||
|
get_processor,
|
||||||
|
get_tokenizer,
|
||||||
|
get_tokenizer_from_processor,
|
||||||
|
)
|
||||||
from sglang.utils import TypeBasedDispatcher, get_exception_traceback
|
from sglang.utils import TypeBasedDispatcher, get_exception_traceback
|
||||||
|
|
||||||
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
|
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
|
||||||
|
|||||||
@@ -22,11 +22,6 @@ import torch
|
|||||||
|
|
||||||
from sglang.srt.configs.model_config import ModelConfig
|
from sglang.srt.configs.model_config import ModelConfig
|
||||||
from sglang.srt.distributed import get_pp_group, get_world_group
|
from sglang.srt.distributed import get_pp_group, get_world_group
|
||||||
from sglang.srt.hf_transformers_utils import (
|
|
||||||
get_processor,
|
|
||||||
get_tokenizer,
|
|
||||||
get_tokenizer_from_processor,
|
|
||||||
)
|
|
||||||
from sglang.srt.layers.logits_processor import LogitsProcessorOutput
|
from sglang.srt.layers.logits_processor import LogitsProcessorOutput
|
||||||
from sglang.srt.managers.io_struct import (
|
from sglang.srt.managers.io_struct import (
|
||||||
DestroyWeightsUpdateGroupReqInput,
|
DestroyWeightsUpdateGroupReqInput,
|
||||||
@@ -49,9 +44,14 @@ from sglang.srt.model_executor.forward_batch_info import (
|
|||||||
PPProxyTensors,
|
PPProxyTensors,
|
||||||
)
|
)
|
||||||
from sglang.srt.model_executor.model_runner import ModelRunner
|
from sglang.srt.model_executor.model_runner import ModelRunner
|
||||||
from sglang.srt.patch_torch import monkey_patch_torch_reductions
|
|
||||||
from sglang.srt.server_args import ServerArgs
|
from sglang.srt.server_args import ServerArgs
|
||||||
from sglang.srt.utils import MultiprocessingSerializer, broadcast_pyobj, set_random_seed
|
from sglang.srt.utils import MultiprocessingSerializer, broadcast_pyobj, set_random_seed
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import (
|
||||||
|
get_processor,
|
||||||
|
get_tokenizer,
|
||||||
|
get_tokenizer_from_processor,
|
||||||
|
)
|
||||||
|
from sglang.srt.utils.patch_torch import monkey_patch_torch_reductions
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from sglang.srt.managers.cache_controller import LayerDoneCounter
|
from sglang.srt.managers.cache_controller import LayerDoneCounter
|
||||||
|
|||||||
@@ -34,7 +34,6 @@ from sglang.srt.model_executor.forward_batch_info import (
|
|||||||
ForwardMode,
|
ForwardMode,
|
||||||
PPProxyTensors,
|
PPProxyTensors,
|
||||||
)
|
)
|
||||||
from sglang.srt.patch_torch import monkey_patch_torch_compile
|
|
||||||
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
|
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
|
||||||
from sglang.srt.utils import (
|
from sglang.srt.utils import (
|
||||||
log_info_on_rank0,
|
log_info_on_rank0,
|
||||||
@@ -43,6 +42,7 @@ from sglang.srt.utils import (
|
|||||||
require_mlp_sync,
|
require_mlp_sync,
|
||||||
require_mlp_tp_gather,
|
require_mlp_tp_gather,
|
||||||
)
|
)
|
||||||
|
from sglang.srt.utils.patch_torch import monkey_patch_torch_compile
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@@ -48,7 +48,6 @@ from sglang.srt.model_executor.forward_batch_info import (
|
|||||||
PPProxyTensors,
|
PPProxyTensors,
|
||||||
enable_num_token_non_padded,
|
enable_num_token_non_padded,
|
||||||
)
|
)
|
||||||
from sglang.srt.patch_torch import monkey_patch_torch_compile
|
|
||||||
from sglang.srt.two_batch_overlap import TboCudaGraphRunnerPlugin
|
from sglang.srt.two_batch_overlap import TboCudaGraphRunnerPlugin
|
||||||
from sglang.srt.utils import (
|
from sglang.srt.utils import (
|
||||||
empty_context,
|
empty_context,
|
||||||
@@ -62,6 +61,7 @@ from sglang.srt.utils import (
|
|||||||
require_mlp_sync,
|
require_mlp_sync,
|
||||||
require_mlp_tp_gather,
|
require_mlp_tp_gather,
|
||||||
)
|
)
|
||||||
|
from sglang.srt.utils.patch_torch import monkey_patch_torch_compile
|
||||||
|
|
||||||
_is_hip = is_hip()
|
_is_hip = is_hip()
|
||||||
|
|
||||||
|
|||||||
@@ -29,7 +29,6 @@ from typing import List, Optional, Tuple, Union
|
|||||||
import torch
|
import torch
|
||||||
import torch.distributed as dist
|
import torch.distributed as dist
|
||||||
|
|
||||||
from sglang.srt import slow_rank_detector
|
|
||||||
from sglang.srt.configs.device_config import DeviceConfig
|
from sglang.srt.configs.device_config import DeviceConfig
|
||||||
from sglang.srt.configs.load_config import LoadConfig, LoadFormat
|
from sglang.srt.configs.load_config import LoadConfig, LoadFormat
|
||||||
from sglang.srt.configs.model_config import AttentionArch, ModelConfig
|
from sglang.srt.configs.model_config import AttentionArch, ModelConfig
|
||||||
@@ -115,7 +114,6 @@ from sglang.srt.offloader import (
|
|||||||
get_offloader,
|
get_offloader,
|
||||||
set_offloader,
|
set_offloader,
|
||||||
)
|
)
|
||||||
from sglang.srt.patch_torch import monkey_patch_torch_reductions
|
|
||||||
from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo
|
from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo
|
||||||
from sglang.srt.server_args import ServerArgs
|
from sglang.srt.server_args import ServerArgs
|
||||||
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
|
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
|
||||||
@@ -140,7 +138,9 @@ from sglang.srt.utils import (
|
|||||||
monkey_patch_p2p_access_check,
|
monkey_patch_p2p_access_check,
|
||||||
monkey_patch_vllm_gguf_config,
|
monkey_patch_vllm_gguf_config,
|
||||||
set_cuda_arch,
|
set_cuda_arch,
|
||||||
|
slow_rank_detector,
|
||||||
)
|
)
|
||||||
|
from sglang.srt.utils.patch_torch import monkey_patch_torch_reductions
|
||||||
from sglang.srt.weight_sync.tensor_bucket import (
|
from sglang.srt.weight_sync.tensor_bucket import (
|
||||||
FlattenedTensorBucket,
|
FlattenedTensorBucket,
|
||||||
FlattenedTensorMetadata,
|
FlattenedTensorMetadata,
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ import torch.nn as nn
|
|||||||
from transformers.activations import ACT2FN
|
from transformers.activations import ACT2FN
|
||||||
|
|
||||||
from sglang.srt.configs import DotsOCRConfig
|
from sglang.srt.configs import DotsOCRConfig
|
||||||
from sglang.srt.hf_transformers_utils import get_processor
|
|
||||||
from sglang.srt.layers.logits_processor import LogitsProcessor
|
from sglang.srt.layers.logits_processor import LogitsProcessor
|
||||||
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
||||||
from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
|
from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
|
||||||
@@ -23,6 +22,7 @@ from sglang.srt.model_loader.weight_utils import default_weight_loader
|
|||||||
from sglang.srt.models.dots_vlm_vit import DotsVisionTransformer
|
from sglang.srt.models.dots_vlm_vit import DotsVisionTransformer
|
||||||
from sglang.srt.models.qwen2 import Qwen2ForCausalLM
|
from sglang.srt.models.qwen2 import Qwen2ForCausalLM
|
||||||
from sglang.srt.utils import add_prefix
|
from sglang.srt.utils import add_prefix
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_processor
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@@ -23,7 +23,6 @@ import torch
|
|||||||
from torch import nn
|
from torch import nn
|
||||||
from transformers import Gemma3Config, PreTrainedModel
|
from transformers import Gemma3Config, PreTrainedModel
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_processor
|
|
||||||
from sglang.srt.layers.layernorm import Gemma3RMSNorm
|
from sglang.srt.layers.layernorm import Gemma3RMSNorm
|
||||||
from sglang.srt.layers.logits_processor import LogitsProcessor
|
from sglang.srt.layers.logits_processor import LogitsProcessor
|
||||||
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
from sglang.srt.layers.quantization.base_config import QuantizationConfig
|
||||||
@@ -44,6 +43,7 @@ from sglang.srt.model_loader.weight_utils import (
|
|||||||
from sglang.srt.models.gemma3_causal import Gemma3ForCausalLM
|
from sglang.srt.models.gemma3_causal import Gemma3ForCausalLM
|
||||||
from sglang.srt.models.siglip import SiglipVisionModel
|
from sglang.srt.models.siglip import SiglipVisionModel
|
||||||
from sglang.srt.utils import add_prefix
|
from sglang.srt.utils import add_prefix
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_processor
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@@ -14,7 +14,6 @@ from transformers import (
|
|||||||
)
|
)
|
||||||
from transformers.models.auto.modeling_auto import AutoModel
|
from transformers.models.auto.modeling_auto import AutoModel
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_processor
|
|
||||||
from sglang.srt.layers.layernorm import RMSNorm
|
from sglang.srt.layers.layernorm import RMSNorm
|
||||||
from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
|
from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
|
||||||
from sglang.srt.layers.logits_processor import LogitsProcessor
|
from sglang.srt.layers.logits_processor import LogitsProcessor
|
||||||
@@ -38,6 +37,7 @@ from sglang.srt.model_loader.weight_utils import (
|
|||||||
from sglang.srt.models.gemma3n_audio import Gemma3nAudioEncoder
|
from sglang.srt.models.gemma3n_audio import Gemma3nAudioEncoder
|
||||||
from sglang.srt.models.gemma3n_causal import Gemma3nRMSNorm, Gemma3nTextModel
|
from sglang.srt.models.gemma3n_causal import Gemma3nRMSNorm, Gemma3nTextModel
|
||||||
from sglang.srt.utils import add_prefix
|
from sglang.srt.utils import add_prefix
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_processor
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ import torch.nn as nn
|
|||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from transformers.models.glm4v.configuration_glm4v import Glm4vConfig, Glm4vVisionConfig
|
from transformers.models.glm4v.configuration_glm4v import Glm4vConfig, Glm4vVisionConfig
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_processor
|
|
||||||
from sglang.srt.layers.activation import SiluAndMul
|
from sglang.srt.layers.activation import SiluAndMul
|
||||||
from sglang.srt.layers.attention import vision_utils
|
from sglang.srt.layers.attention import vision_utils
|
||||||
from sglang.srt.layers.layernorm import RMSNorm
|
from sglang.srt.layers.layernorm import RMSNorm
|
||||||
@@ -28,6 +27,7 @@ from sglang.srt.models.qwen2_5_vl import (
|
|||||||
Qwen2_5_VLForConditionalGeneration,
|
Qwen2_5_VLForConditionalGeneration,
|
||||||
)
|
)
|
||||||
from sglang.srt.utils import add_prefix
|
from sglang.srt.utils import add_prefix
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_processor
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ from sglang.srt.distributed import (
|
|||||||
get_moe_expert_parallel_world_size,
|
get_moe_expert_parallel_world_size,
|
||||||
get_tensor_model_parallel_world_size,
|
get_tensor_model_parallel_world_size,
|
||||||
)
|
)
|
||||||
from sglang.srt.hf_transformers_utils import get_processor
|
|
||||||
from sglang.srt.layers.attention import vision_utils
|
from sglang.srt.layers.attention import vision_utils
|
||||||
from sglang.srt.layers.logits_processor import LogitsProcessor
|
from sglang.srt.layers.logits_processor import LogitsProcessor
|
||||||
from sglang.srt.layers.moe.fused_moe_triton import FusedMoE
|
from sglang.srt.layers.moe.fused_moe_triton import FusedMoE
|
||||||
@@ -22,6 +21,7 @@ from sglang.srt.model_loader.weight_utils import default_weight_loader
|
|||||||
from sglang.srt.models.glm4_moe import Glm4MoeModel
|
from sglang.srt.models.glm4_moe import Glm4MoeModel
|
||||||
from sglang.srt.models.glm4v import Glm4vForConditionalGeneration, Glm4vVisionModel
|
from sglang.srt.models.glm4v import Glm4vForConditionalGeneration, Glm4vVisionModel
|
||||||
from sglang.srt.utils import add_prefix, is_cuda, log_info_on_rank0
|
from sglang.srt.utils import add_prefix, is_cuda, log_info_on_rank0
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_processor
|
||||||
|
|
||||||
_is_cuda = is_cuda()
|
_is_cuda = is_cuda()
|
||||||
|
|
||||||
|
|||||||
@@ -40,7 +40,6 @@ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
|
|||||||
Qwen2_5_VisionRotaryEmbedding,
|
Qwen2_5_VisionRotaryEmbedding,
|
||||||
)
|
)
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_processor
|
|
||||||
from sglang.srt.layers.attention.vision import VisionAttention
|
from sglang.srt.layers.attention.vision import VisionAttention
|
||||||
from sglang.srt.layers.layernorm import RMSNorm
|
from sglang.srt.layers.layernorm import RMSNorm
|
||||||
from sglang.srt.layers.linear import (
|
from sglang.srt.layers.linear import (
|
||||||
@@ -61,6 +60,7 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch
|
|||||||
from sglang.srt.model_loader.weight_utils import default_weight_loader
|
from sglang.srt.model_loader.weight_utils import default_weight_loader
|
||||||
from sglang.srt.models.qwen2 import Qwen2Model
|
from sglang.srt.models.qwen2 import Qwen2Model
|
||||||
from sglang.srt.utils import add_prefix
|
from sglang.srt.utils import add_prefix
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_processor
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@@ -39,7 +39,6 @@ from transformers.models.qwen2_audio.modeling_qwen2_audio import (
|
|||||||
Qwen2AudioMultiModalProjector,
|
Qwen2AudioMultiModalProjector,
|
||||||
)
|
)
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_processor
|
|
||||||
from sglang.srt.layers.activation import QuickGELU
|
from sglang.srt.layers.activation import QuickGELU
|
||||||
from sglang.srt.layers.attention.vision import VisionAttention
|
from sglang.srt.layers.attention.vision import VisionAttention
|
||||||
from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
|
from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
|
||||||
@@ -61,6 +60,7 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch
|
|||||||
from sglang.srt.model_loader.weight_utils import default_weight_loader
|
from sglang.srt.model_loader.weight_utils import default_weight_loader
|
||||||
from sglang.srt.models.qwen2 import Qwen2ForCausalLM
|
from sglang.srt.models.qwen2 import Qwen2ForCausalLM
|
||||||
from sglang.srt.utils import add_prefix
|
from sglang.srt.utils import add_prefix
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_processor
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@@ -33,7 +33,6 @@ from einops import rearrange
|
|||||||
from transformers import Qwen2VLConfig
|
from transformers import Qwen2VLConfig
|
||||||
from transformers.models.qwen2_vl.configuration_qwen2_vl import Qwen2VLVisionConfig
|
from transformers.models.qwen2_vl.configuration_qwen2_vl import Qwen2VLVisionConfig
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_processor
|
|
||||||
from sglang.srt.layers.activation import QuickGELU
|
from sglang.srt.layers.activation import QuickGELU
|
||||||
from sglang.srt.layers.attention.vision import VisionAttention
|
from sglang.srt.layers.attention.vision import VisionAttention
|
||||||
from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
|
from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
|
||||||
@@ -50,6 +49,7 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch
|
|||||||
from sglang.srt.model_loader.weight_utils import default_weight_loader
|
from sglang.srt.model_loader.weight_utils import default_weight_loader
|
||||||
from sglang.srt.models.qwen2 import Qwen2Model
|
from sglang.srt.models.qwen2 import Qwen2Model
|
||||||
from sglang.srt.utils import add_prefix
|
from sglang.srt.utils import add_prefix
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_processor
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@@ -28,7 +28,6 @@ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
from sglang.srt.configs.qwen3_vl import Qwen3VLConfig, Qwen3VLVisionConfig
|
from sglang.srt.configs.qwen3_vl import Qwen3VLConfig, Qwen3VLVisionConfig
|
||||||
from sglang.srt.hf_transformers_utils import get_processor
|
|
||||||
from sglang.srt.layers.attention.vision import VisionAttention
|
from sglang.srt.layers.attention.vision import VisionAttention
|
||||||
from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
|
from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
|
||||||
from sglang.srt.layers.logits_processor import LogitsProcessor
|
from sglang.srt.layers.logits_processor import LogitsProcessor
|
||||||
@@ -45,6 +44,7 @@ from sglang.srt.model_loader.weight_utils import default_weight_loader
|
|||||||
from sglang.srt.models.qwen2_vl import Qwen2VLVideoInputs
|
from sglang.srt.models.qwen2_vl import Qwen2VLVideoInputs
|
||||||
from sglang.srt.models.qwen3 import Qwen3Model
|
from sglang.srt.models.qwen3 import Qwen3Model
|
||||||
from sglang.srt.utils import add_prefix
|
from sglang.srt.utils import add_prefix
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_processor
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@@ -34,7 +34,6 @@ from sglang.srt.distributed import (
|
|||||||
get_pp_group,
|
get_pp_group,
|
||||||
get_tensor_model_parallel_rank,
|
get_tensor_model_parallel_rank,
|
||||||
)
|
)
|
||||||
from sglang.srt.hf_transformers_utils import get_processor
|
|
||||||
from sglang.srt.layers.logits_processor import LogitsProcessor
|
from sglang.srt.layers.logits_processor import LogitsProcessor
|
||||||
from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
|
from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
|
||||||
from sglang.srt.layers.pooler import Pooler, PoolingType
|
from sglang.srt.layers.pooler import Pooler, PoolingType
|
||||||
@@ -58,6 +57,7 @@ from sglang.srt.models.qwen3_vl import (
|
|||||||
Qwen3VLForConditionalGeneration,
|
Qwen3VLForConditionalGeneration,
|
||||||
)
|
)
|
||||||
from sglang.srt.utils import add_prefix
|
from sglang.srt.utils import add_prefix
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_processor
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@@ -24,7 +24,6 @@ from typing import List, Literal, Optional, Union
|
|||||||
|
|
||||||
from sglang.srt.connector import ConnectorType
|
from sglang.srt.connector import ConnectorType
|
||||||
from sglang.srt.function_call.function_call_parser import FunctionCallParser
|
from sglang.srt.function_call.function_call_parser import FunctionCallParser
|
||||||
from sglang.srt.hf_transformers_utils import check_gguf_file, get_config
|
|
||||||
from sglang.srt.lora.lora_registry import LoRARef
|
from sglang.srt.lora.lora_registry import LoRARef
|
||||||
from sglang.srt.parser.reasoning_parser import ReasoningParser
|
from sglang.srt.parser.reasoning_parser import ReasoningParser
|
||||||
from sglang.srt.utils import (
|
from sglang.srt.utils import (
|
||||||
@@ -47,6 +46,7 @@ from sglang.srt.utils import (
|
|||||||
nullable_str,
|
nullable_str,
|
||||||
parse_connector_type,
|
parse_connector_type,
|
||||||
)
|
)
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import check_gguf_file, get_config
|
||||||
from sglang.utils import is_in_ci
|
from sglang.utils import is_in_ci
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|||||||
2
python/sglang/srt/utils/__init__.py
Normal file
2
python/sglang/srt/utils/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
# Temporarily do this to avoid changing all imports in the repo
|
||||||
|
from .common import *
|
||||||
@@ -33,7 +33,7 @@ async def update_weights(
|
|||||||
"""
|
"""
|
||||||
infer_tp_size = device_mesh[device_mesh_key].mesh.size()[0]
|
infer_tp_size = device_mesh[device_mesh_key].mesh.size()[0]
|
||||||
infer_tp_rank = device_mesh[device_mesh_key].get_local_rank()
|
infer_tp_rank = device_mesh[device_mesh_key].get_local_rank()
|
||||||
from sglang.srt.patch_torch import monkey_patch_torch_reductions
|
from sglang.srt.utils.patch_torch import monkey_patch_torch_reductions
|
||||||
|
|
||||||
monkey_patch_torch_reductions()
|
monkey_patch_torch_reductions()
|
||||||
|
|
||||||
|
|||||||
@@ -30,8 +30,8 @@ from transformers import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
from sglang.srt.entrypoints.engine import Engine
|
from sglang.srt.entrypoints.engine import Engine
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
|
||||||
from sglang.srt.utils import load_image
|
from sglang.srt.utils import load_image
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.test_utils import DEFAULT_PORT_FOR_SRT_TEST_RUNNER, calculate_rouge_l
|
from sglang.test.test_utils import DEFAULT_PORT_FOR_SRT_TEST_RUNNER, calculate_rouge_l
|
||||||
|
|
||||||
DEFAULT_PROMPTS = [
|
DEFAULT_PROMPTS = [
|
||||||
|
|||||||
@@ -551,7 +551,7 @@ def test_gen_min_new_tokens():
|
|||||||
We verify that the number of tokens in the answer is >= the min_tokens threshold.
|
We verify that the number of tokens in the answer is >= the min_tokens threshold.
|
||||||
"""
|
"""
|
||||||
import sglang as sgl
|
import sglang as sgl
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
|
|
||||||
model_path = sgl.global_config.default_backend.endpoint.get_model_name()
|
model_path = sgl.global_config.default_backend.endpoint.get_model_name()
|
||||||
MIN_TOKENS, MAX_TOKENS = 64, 128
|
MIN_TOKENS, MAX_TOKENS = 64, 128
|
||||||
|
|||||||
@@ -921,7 +921,7 @@ def run_score_benchmark(
|
|||||||
async def _run_benchmark():
|
async def _run_benchmark():
|
||||||
|
|
||||||
# Load tokenizer for generating test data
|
# Load tokenizer for generating test data
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
|
|
||||||
tokenizer = get_tokenizer(model)
|
tokenizer = get_tokenizer(model)
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import code
|
import code
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ from transformers import (
|
|||||||
AutoProcessor,
|
AutoProcessor,
|
||||||
)
|
)
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
|
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
|
|||||||
@@ -13,8 +13,8 @@ import numpy as np
|
|||||||
import openai
|
import openai
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
|
||||||
from sglang.srt.utils import kill_process_tree
|
from sglang.srt.utils import kill_process_tree
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.runners import TEST_RERANK_QUERY_DOCS
|
from sglang.test.runners import TEST_RERANK_QUERY_DOCS
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_SMALL_CROSS_ENCODER_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_CROSS_ENCODER_MODEL_NAME_FOR_TEST,
|
||||||
|
|||||||
@@ -16,8 +16,8 @@ import unittest
|
|||||||
import openai
|
import openai
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
|
||||||
from sglang.srt.utils import kill_process_tree
|
from sglang.srt.utils import kill_process_tree
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_ENABLE_THINKING_MODEL_NAME_FOR_TEST,
|
DEFAULT_ENABLE_THINKING_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
|||||||
@@ -2,8 +2,8 @@ import re
|
|||||||
|
|
||||||
import openai
|
import openai
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
|
||||||
from sglang.srt.utils import kill_process_tree
|
from sglang.srt.utils import kill_process_tree
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
|||||||
@@ -8,8 +8,8 @@ import numpy as np
|
|||||||
import openai
|
import openai
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
|
||||||
from sglang.srt.utils import kill_process_tree
|
from sglang.srt.utils import kill_process_tree
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
|
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
|
||||||
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
|
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
|
||||||
|
|||||||
@@ -4,8 +4,8 @@ import unittest
|
|||||||
|
|
||||||
import openai
|
import openai
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
|
||||||
from sglang.srt.utils import kill_process_tree
|
from sglang.srt.utils import kill_process_tree
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
|||||||
@@ -12,8 +12,8 @@ import unittest
|
|||||||
|
|
||||||
import openai
|
import openai
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
|
||||||
from sglang.srt.utils import kill_process_tree
|
from sglang.srt.utils import kill_process_tree
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
|||||||
@@ -9,8 +9,8 @@ from concurrent.futures import ThreadPoolExecutor
|
|||||||
|
|
||||||
import openai
|
import openai
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
|
||||||
from sglang.srt.utils import kill_process_tree
|
from sglang.srt.utils import kill_process_tree
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import openai
|
import openai
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
|
||||||
from sglang.srt.utils import kill_process_tree
|
from sglang.srt.utils import kill_process_tree
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
|||||||
@@ -19,8 +19,8 @@ from torch.distributed.fsdp.api import (
|
|||||||
from transformers import AutoModelForCausalLM
|
from transformers import AutoModelForCausalLM
|
||||||
|
|
||||||
from sglang.srt.entrypoints.verl_engine import VerlEngine
|
from sglang.srt.entrypoints.verl_engine import VerlEngine
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
|
||||||
from sglang.srt.utils import is_port_available
|
from sglang.srt.utils import is_port_available
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.runners import (
|
from sglang.test.runners import (
|
||||||
HFRunner,
|
HFRunner,
|
||||||
SRTRunner,
|
SRTRunner,
|
||||||
|
|||||||
@@ -19,8 +19,8 @@ from torch.distributed.fsdp.api import (
|
|||||||
from transformers import AutoModelForCausalLM
|
from transformers import AutoModelForCausalLM
|
||||||
|
|
||||||
from sglang.srt.entrypoints.verl_engine import VerlEngine
|
from sglang.srt.entrypoints.verl_engine import VerlEngine
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
|
||||||
from sglang.srt.utils import is_port_available
|
from sglang.srt.utils import is_port_available
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.runners import (
|
from sglang.test.runners import (
|
||||||
HFRunner,
|
HFRunner,
|
||||||
SRTRunner,
|
SRTRunner,
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import unittest
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
|
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
|
||||||
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
|
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
|
||||||
|
|||||||
@@ -4,8 +4,8 @@ import requests
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
import sglang as sgl
|
import sglang as sgl
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
|
||||||
from sglang.srt.utils import kill_process_tree
|
from sglang.srt.utils import kill_process_tree
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
|
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
|
||||||
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
|
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
|
||||||
|
|||||||
@@ -2,8 +2,8 @@ import unittest
|
|||||||
|
|
||||||
import openai
|
import openai
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
|
||||||
from sglang.srt.utils import kill_process_tree
|
from sglang.srt.utils import kill_process_tree
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
DEFAULT_URL_FOR_TEST,
|
DEFAULT_URL_FOR_TEST,
|
||||||
|
|||||||
@@ -13,13 +13,13 @@ import numpy as np
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
from sglang.srt.configs.model_config import ModelConfig
|
from sglang.srt.configs.model_config import ModelConfig
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
|
||||||
from sglang.srt.managers.schedule_batch import Req, ScheduleBatch
|
from sglang.srt.managers.schedule_batch import Req, ScheduleBatch
|
||||||
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
|
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
|
||||||
from sglang.srt.model_executor.model_runner import ModelRunner
|
from sglang.srt.model_executor.model_runner import ModelRunner
|
||||||
from sglang.srt.sampling.sampling_params import SamplingParams
|
from sglang.srt.sampling.sampling_params import SamplingParams
|
||||||
from sglang.srt.server_args import PortArgs, ServerArgs
|
from sglang.srt.server_args import PortArgs, ServerArgs
|
||||||
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
|
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST, CustomTestCase
|
from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST, CustomTestCase
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ from sglang.srt.function_call.mistral_detector import MistralDetector
|
|||||||
from sglang.srt.function_call.pythonic_detector import PythonicDetector
|
from sglang.srt.function_call.pythonic_detector import PythonicDetector
|
||||||
from sglang.srt.function_call.qwen3_coder_detector import Qwen3CoderDetector
|
from sglang.srt.function_call.qwen3_coder_detector import Qwen3CoderDetector
|
||||||
from sglang.srt.function_call.qwen25_detector import Qwen25Detector
|
from sglang.srt.function_call.qwen25_detector import Qwen25Detector
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ from typing import Dict, List
|
|||||||
import torch
|
import torch
|
||||||
import torch.multiprocessing as mp
|
import torch.multiprocessing as mp
|
||||||
|
|
||||||
from sglang.srt.patch_torch import monkey_patch_torch_reductions
|
from sglang.srt.utils.patch_torch import monkey_patch_torch_reductions
|
||||||
|
|
||||||
|
|
||||||
class TestReleaseMemoryOccupation(unittest.TestCase):
|
class TestReleaseMemoryOccupation(unittest.TestCase):
|
||||||
|
|||||||
@@ -7,8 +7,8 @@ import unittest
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
|
||||||
from sglang.srt.utils import kill_process_tree
|
from sglang.srt.utils import kill_process_tree
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
|||||||
@@ -13,8 +13,8 @@ import unittest
|
|||||||
import aiohttp
|
import aiohttp
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
|
||||||
from sglang.srt.utils import kill_process_tree
|
from sglang.srt.utils import kill_process_tree
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
|
|||||||
@@ -12,8 +12,8 @@ import torch
|
|||||||
|
|
||||||
import sglang as sgl
|
import sglang as sgl
|
||||||
from sglang.bench_offline_throughput import BenchArgs, throughput_test
|
from sglang.bench_offline_throughput import BenchArgs, throughput_test
|
||||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
|
||||||
from sglang.srt.server_args import ServerArgs
|
from sglang.srt.server_args import ServerArgs
|
||||||
|
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
|
||||||
from sglang.test.few_shot_gsm8k_engine import run_eval
|
from sglang.test.few_shot_gsm8k_engine import run_eval
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST,
|
DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST,
|
||||||
|
|||||||
@@ -34,7 +34,9 @@ class TestTokenizerBatchEncode(unittest.TestCase):
|
|||||||
|
|
||||||
with patch("zmq.asyncio.Context"), patch(
|
with patch("zmq.asyncio.Context"), patch(
|
||||||
"sglang.srt.utils.get_zmq_socket"
|
"sglang.srt.utils.get_zmq_socket"
|
||||||
), patch("sglang.srt.hf_transformers_utils.get_tokenizer") as mock_tokenizer:
|
), patch(
|
||||||
|
"sglang.srt.utils.hf_transformers_utils.get_tokenizer"
|
||||||
|
) as mock_tokenizer:
|
||||||
|
|
||||||
mock_tokenizer.return_value = Mock(vocab_size=32000)
|
mock_tokenizer.return_value = Mock(vocab_size=32000)
|
||||||
self.tokenizer_manager = TokenizerManager(self.server_args, self.port_args)
|
self.tokenizer_manager = TokenizerManager(self.server_args, self.port_args)
|
||||||
|
|||||||
@@ -31,7 +31,9 @@ class TestInputFormatDetection(unittest.TestCase):
|
|||||||
|
|
||||||
with patch("zmq.asyncio.Context"), patch(
|
with patch("zmq.asyncio.Context"), patch(
|
||||||
"sglang.srt.utils.get_zmq_socket"
|
"sglang.srt.utils.get_zmq_socket"
|
||||||
), patch("sglang.srt.hf_transformers_utils.get_tokenizer") as mock_tokenizer:
|
), patch(
|
||||||
|
"sglang.srt.utils.hf_transformers_utils.get_tokenizer"
|
||||||
|
) as mock_tokenizer:
|
||||||
mock_tokenizer.return_value = Mock(vocab_size=32000)
|
mock_tokenizer.return_value = Mock(vocab_size=32000)
|
||||||
self.tokenizer_manager = TokenizerManager(self.server_args, self.port_args)
|
self.tokenizer_manager = TokenizerManager(self.server_args, self.port_args)
|
||||||
|
|
||||||
@@ -125,7 +127,9 @@ class TestTokenizerInputPreparation(unittest.TestCase):
|
|||||||
|
|
||||||
with patch("zmq.asyncio.Context"), patch(
|
with patch("zmq.asyncio.Context"), patch(
|
||||||
"sglang.srt.utils.get_zmq_socket"
|
"sglang.srt.utils.get_zmq_socket"
|
||||||
), patch("sglang.srt.hf_transformers_utils.get_tokenizer") as mock_tokenizer:
|
), patch(
|
||||||
|
"sglang.srt.utils.hf_transformers_utils.get_tokenizer"
|
||||||
|
) as mock_tokenizer:
|
||||||
mock_tokenizer.return_value = Mock(vocab_size=32000)
|
mock_tokenizer.return_value = Mock(vocab_size=32000)
|
||||||
self.tokenizer_manager = TokenizerManager(self.server_args, self.port_args)
|
self.tokenizer_manager = TokenizerManager(self.server_args, self.port_args)
|
||||||
|
|
||||||
@@ -177,7 +181,9 @@ class TestTokenizerResultExtraction(unittest.TestCase):
|
|||||||
|
|
||||||
with patch("zmq.asyncio.Context"), patch(
|
with patch("zmq.asyncio.Context"), patch(
|
||||||
"sglang.srt.utils.get_zmq_socket"
|
"sglang.srt.utils.get_zmq_socket"
|
||||||
), patch("sglang.srt.hf_transformers_utils.get_tokenizer") as mock_tokenizer:
|
), patch(
|
||||||
|
"sglang.srt.utils.hf_transformers_utils.get_tokenizer"
|
||||||
|
) as mock_tokenizer:
|
||||||
mock_tokenizer.return_value = Mock(vocab_size=32000)
|
mock_tokenizer.return_value = Mock(vocab_size=32000)
|
||||||
self.tokenizer_manager = TokenizerManager(self.server_args, self.port_args)
|
self.tokenizer_manager = TokenizerManager(self.server_args, self.port_args)
|
||||||
|
|
||||||
@@ -279,7 +285,9 @@ class TestTokenizerManagerIntegration(unittest.TestCase):
|
|||||||
|
|
||||||
with patch("zmq.asyncio.Context"), patch(
|
with patch("zmq.asyncio.Context"), patch(
|
||||||
"sglang.srt.utils.get_zmq_socket"
|
"sglang.srt.utils.get_zmq_socket"
|
||||||
), patch("sglang.srt.hf_transformers_utils.get_tokenizer") as mock_tokenizer:
|
), patch(
|
||||||
|
"sglang.srt.utils.hf_transformers_utils.get_tokenizer"
|
||||||
|
) as mock_tokenizer:
|
||||||
mock_tokenizer.return_value = Mock(vocab_size=32000)
|
mock_tokenizer.return_value = Mock(vocab_size=32000)
|
||||||
self.tokenizer_manager = TokenizerManager(self.server_args, self.port_args)
|
self.tokenizer_manager = TokenizerManager(self.server_args, self.port_args)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user