Tiny move files to utils folder (#11166)

This commit is contained in:
fzyzcjy
2025-10-03 22:40:06 +08:00
committed by GitHub
parent 04b86b3c5c
commit fdc4e1e570
66 changed files with 91 additions and 79 deletions

View File

@@ -60,7 +60,6 @@ import torch.distributed as dist
from sglang.srt.configs.model_config import ModelConfig
from sglang.srt.distributed.parallel_state import destroy_distributed_environment
from sglang.srt.entrypoints.engine import _set_envs_and_config
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.layers.moe import initialize_moe_config
from sglang.srt.managers.schedule_batch import Req, ScheduleBatch
from sglang.srt.managers.scheduler import Scheduler
@@ -78,6 +77,7 @@ from sglang.srt.utils import (
set_gpu_proc_affinity,
suppress_other_loggers,
)
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
@dataclasses.dataclass

View File

@@ -635,7 +635,7 @@ def get_tokenizer(
if pretrained_model_name_or_path.endswith(
".json"
) or pretrained_model_name_or_path.endswith(".model"):
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
return get_tokenizer(pretrained_model_name_or_path)

View File

@@ -433,7 +433,7 @@ class Runtime:
self.endpoint.cache_prefix(prefix)
def get_tokenizer(self):
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
return get_tokenizer(
self.server_args.tokenizer_path,

View File

@@ -23,16 +23,16 @@ import torch
from transformers import PretrainedConfig
from sglang.srt.environ import envs
from sglang.srt.hf_transformers_utils import (
from sglang.srt.layers.quantization import QUANTIZATION_METHODS
from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import is_hip, retry
from sglang.srt.utils.hf_transformers_utils import (
get_config,
get_context_length,
get_generation_config,
get_hf_text_config,
get_sparse_attention_config,
)
from sglang.srt.layers.quantization import QUANTIZATION_METHODS
from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import is_hip, retry
from sglang.utils import is_in_ci
logger = logging.getLogger(__name__)

View File

@@ -26,12 +26,12 @@ import torch
from torch import nn
from sglang.srt.configs.load_config import LoadConfig
from sglang.srt.hf_transformers_utils import AutoConfig
from sglang.srt.lora.backend.base_backend import BaseLoRABackend
from sglang.srt.lora.backend.chunked_backend import ChunkedSgmvLoRABackend
from sglang.srt.lora.backend.triton_backend import TritonLoRABackend
from sglang.srt.lora.lora_config import LoRAConfig
from sglang.srt.model_loader.loader import DefaultModelLoader
from sglang.srt.utils.hf_transformers_utils import AutoConfig
logger = logging.getLogger(__name__)

View File

@@ -21,7 +21,6 @@ from typing import Dict, Iterable, List, Optional, Set, Tuple
import torch
from sglang.srt.configs.load_config import LoadConfig
from sglang.srt.hf_transformers_utils import AutoConfig
from sglang.srt.lora.backend.base_backend import BaseLoRABackend, get_backend_from_name
from sglang.srt.lora.layers import BaseLayerWithLoRA, get_lora_layer
from sglang.srt.lora.lora import LoRAAdapter
@@ -39,6 +38,7 @@ from sglang.srt.managers.io_struct import LoRAUpdateOutput
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import replace_submodule
from sglang.srt.utils.hf_transformers_utils import AutoConfig
logger = logging.getLogger(__name__)

View File

@@ -4,7 +4,6 @@ from typing import Callable, Dict, Iterable, List, Optional, Set, Tuple, Union
import torch
from sglang.srt.distributed import divide
from sglang.srt.hf_transformers_utils import AutoConfig
from sglang.srt.lora.layers import BaseLayerWithLoRA
from sglang.srt.lora.lora import LoRAAdapter
from sglang.srt.lora.lora_config import LoRAConfig
@@ -17,6 +16,7 @@ from sglang.srt.lora.utils import (
get_stacked_multiply,
get_target_module_name,
)
from sglang.srt.utils.hf_transformers_utils import AutoConfig
logger = logging.getLogger(__name__)

View File

@@ -5,7 +5,7 @@ from typing import Iterable, Optional, Set, Tuple
import torch
from sglang.srt.hf_transformers_utils import AutoConfig
from sglang.srt.utils.hf_transformers_utils import AutoConfig
@dataclass

View File

@@ -24,7 +24,6 @@ import psutil
import setproctitle
import zmq
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.managers.io_struct import (
BatchEmbeddingOutput,
BatchMultimodalDecodeReq,
@@ -42,6 +41,7 @@ from sglang.srt.utils import (
get_zmq_socket,
kill_itself_when_parent_died,
)
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
from sglang.utils import (
TypeBasedDispatcher,
find_printable_text,

View File

@@ -60,11 +60,6 @@ from sglang.srt.disaggregation.utils import (
)
from sglang.srt.distributed import get_pp_group, get_world_group
from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
from sglang.srt.hf_transformers_utils import (
get_processor,
get_tokenizer,
get_tokenizer_from_processor,
)
from sglang.srt.layers.dp_attention import compute_dp_attention_world_info
from sglang.srt.layers.logits_processor import LogitsProcessorOutput
from sglang.srt.layers.moe import initialize_moe_config
@@ -190,6 +185,11 @@ from sglang.srt.utils import (
set_random_seed,
suppress_other_loggers,
)
from sglang.srt.utils.hf_transformers_utils import (
get_processor,
get_tokenizer,
get_tokenizer_from_processor,
)
from sglang.utils import TypeBasedDispatcher, get_exception_traceback
logger = logging.getLogger(__name__)

View File

@@ -17,7 +17,7 @@ from enum import Enum, auto
from typing import Any, List, Optional
from sglang.srt.managers.io_struct import BlockReqInput, BlockReqType
from sglang.srt.poll_based_barrier import PollBasedBarrier
from sglang.srt.utils.poll_based_barrier import PollBasedBarrier
logger = logging.getLogger(__name__)

View File

@@ -204,7 +204,7 @@ class SchedulerProfilerMixin:
torch.distributed.barrier(self.tp_cpu_group)
if self.tp_rank == 0:
from sglang.srt.rpd_utils import rpd_to_chrome_trace
from sglang.srt.utils.rpd_utils import rpd_to_chrome_trace
rpd_to_chrome_trace("trace.rpd", self.rpd_profile_path)
self.rpd_profiler = None

View File

@@ -43,11 +43,6 @@ from fastapi import BackgroundTasks
from sglang.srt.aio_rwlock import RWLock
from sglang.srt.configs.model_config import ModelConfig
from sglang.srt.disaggregation.utils import DisaggregationMode
from sglang.srt.hf_transformers_utils import (
get_processor,
get_tokenizer,
get_tokenizer_from_processor,
)
from sglang.srt.lora.lora_registry import LoRARegistry
from sglang.srt.managers.async_dynamic_batch_tokenizer import AsyncDynamicbatchTokenizer
from sglang.srt.managers.disagg_service import start_disagg_service
@@ -99,6 +94,11 @@ from sglang.srt.utils import (
get_zmq_socket,
kill_process_tree,
)
from sglang.srt.utils.hf_transformers_utils import (
get_processor,
get_tokenizer,
get_tokenizer_from_processor,
)
from sglang.utils import TypeBasedDispatcher, get_exception_traceback
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())

View File

@@ -22,11 +22,6 @@ import torch
from sglang.srt.configs.model_config import ModelConfig
from sglang.srt.distributed import get_pp_group, get_world_group
from sglang.srt.hf_transformers_utils import (
get_processor,
get_tokenizer,
get_tokenizer_from_processor,
)
from sglang.srt.layers.logits_processor import LogitsProcessorOutput
from sglang.srt.managers.io_struct import (
DestroyWeightsUpdateGroupReqInput,
@@ -49,9 +44,14 @@ from sglang.srt.model_executor.forward_batch_info import (
PPProxyTensors,
)
from sglang.srt.model_executor.model_runner import ModelRunner
from sglang.srt.patch_torch import monkey_patch_torch_reductions
from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import MultiprocessingSerializer, broadcast_pyobj, set_random_seed
from sglang.srt.utils.hf_transformers_utils import (
get_processor,
get_tokenizer,
get_tokenizer_from_processor,
)
from sglang.srt.utils.patch_torch import monkey_patch_torch_reductions
if TYPE_CHECKING:
from sglang.srt.managers.cache_controller import LayerDoneCounter

View File

@@ -34,7 +34,6 @@ from sglang.srt.model_executor.forward_batch_info import (
ForwardMode,
PPProxyTensors,
)
from sglang.srt.patch_torch import monkey_patch_torch_compile
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
from sglang.srt.utils import (
log_info_on_rank0,
@@ -43,6 +42,7 @@ from sglang.srt.utils import (
require_mlp_sync,
require_mlp_tp_gather,
)
from sglang.srt.utils.patch_torch import monkey_patch_torch_compile
logger = logging.getLogger(__name__)

View File

@@ -48,7 +48,6 @@ from sglang.srt.model_executor.forward_batch_info import (
PPProxyTensors,
enable_num_token_non_padded,
)
from sglang.srt.patch_torch import monkey_patch_torch_compile
from sglang.srt.two_batch_overlap import TboCudaGraphRunnerPlugin
from sglang.srt.utils import (
empty_context,
@@ -62,6 +61,7 @@ from sglang.srt.utils import (
require_mlp_sync,
require_mlp_tp_gather,
)
from sglang.srt.utils.patch_torch import monkey_patch_torch_compile
_is_hip = is_hip()

View File

@@ -29,7 +29,6 @@ from typing import List, Optional, Tuple, Union
import torch
import torch.distributed as dist
from sglang.srt import slow_rank_detector
from sglang.srt.configs.device_config import DeviceConfig
from sglang.srt.configs.load_config import LoadConfig, LoadFormat
from sglang.srt.configs.model_config import AttentionArch, ModelConfig
@@ -115,7 +114,6 @@ from sglang.srt.offloader import (
get_offloader,
set_offloader,
)
from sglang.srt.patch_torch import monkey_patch_torch_reductions
from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo
from sglang.srt.server_args import ServerArgs
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
@@ -140,7 +138,9 @@ from sglang.srt.utils import (
monkey_patch_p2p_access_check,
monkey_patch_vllm_gguf_config,
set_cuda_arch,
slow_rank_detector,
)
from sglang.srt.utils.patch_torch import monkey_patch_torch_reductions
from sglang.srt.weight_sync.tensor_bucket import (
FlattenedTensorBucket,
FlattenedTensorMetadata,

View File

@@ -9,7 +9,6 @@ import torch.nn as nn
from transformers.activations import ACT2FN
from sglang.srt.configs import DotsOCRConfig
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
@@ -23,6 +22,7 @@ from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.dots_vlm_vit import DotsVisionTransformer
from sglang.srt.models.qwen2 import Qwen2ForCausalLM
from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__)

View File

@@ -23,7 +23,6 @@ import torch
from torch import nn
from transformers import Gemma3Config, PreTrainedModel
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.layernorm import Gemma3RMSNorm
from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.quantization.base_config import QuantizationConfig
@@ -44,6 +43,7 @@ from sglang.srt.model_loader.weight_utils import (
from sglang.srt.models.gemma3_causal import Gemma3ForCausalLM
from sglang.srt.models.siglip import SiglipVisionModel
from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__)

View File

@@ -14,7 +14,6 @@ from transformers import (
)
from transformers.models.auto.modeling_auto import AutoModel
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.layernorm import RMSNorm
from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
from sglang.srt.layers.logits_processor import LogitsProcessor
@@ -38,6 +37,7 @@ from sglang.srt.model_loader.weight_utils import (
from sglang.srt.models.gemma3n_audio import Gemma3nAudioEncoder
from sglang.srt.models.gemma3n_causal import Gemma3nRMSNorm, Gemma3nTextModel
from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__)

View File

@@ -7,7 +7,6 @@ import torch.nn as nn
import torch.nn.functional as F
from transformers.models.glm4v.configuration_glm4v import Glm4vConfig, Glm4vVisionConfig
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.activation import SiluAndMul
from sglang.srt.layers.attention import vision_utils
from sglang.srt.layers.layernorm import RMSNorm
@@ -28,6 +27,7 @@ from sglang.srt.models.qwen2_5_vl import (
Qwen2_5_VLForConditionalGeneration,
)
from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__)

View File

@@ -10,7 +10,6 @@ from sglang.srt.distributed import (
get_moe_expert_parallel_world_size,
get_tensor_model_parallel_world_size,
)
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.attention import vision_utils
from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.moe.fused_moe_triton import FusedMoE
@@ -22,6 +21,7 @@ from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.glm4_moe import Glm4MoeModel
from sglang.srt.models.glm4v import Glm4vForConditionalGeneration, Glm4vVisionModel
from sglang.srt.utils import add_prefix, is_cuda, log_info_on_rank0
from sglang.srt.utils.hf_transformers_utils import get_processor
_is_cuda = is_cuda()

View File

@@ -40,7 +40,6 @@ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
Qwen2_5_VisionRotaryEmbedding,
)
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.attention.vision import VisionAttention
from sglang.srt.layers.layernorm import RMSNorm
from sglang.srt.layers.linear import (
@@ -61,6 +60,7 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.qwen2 import Qwen2Model
from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__)

View File

@@ -39,7 +39,6 @@ from transformers.models.qwen2_audio.modeling_qwen2_audio import (
Qwen2AudioMultiModalProjector,
)
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.activation import QuickGELU
from sglang.srt.layers.attention.vision import VisionAttention
from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
@@ -61,6 +60,7 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.qwen2 import Qwen2ForCausalLM
from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__)

View File

@@ -33,7 +33,6 @@ from einops import rearrange
from transformers import Qwen2VLConfig
from transformers.models.qwen2_vl.configuration_qwen2_vl import Qwen2VLVisionConfig
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.activation import QuickGELU
from sglang.srt.layers.attention.vision import VisionAttention
from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
@@ -50,6 +49,7 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch
from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.qwen2 import Qwen2Model
from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__)

View File

@@ -28,7 +28,6 @@ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
)
from sglang.srt.configs.qwen3_vl import Qwen3VLConfig, Qwen3VLVisionConfig
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.attention.vision import VisionAttention
from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
from sglang.srt.layers.logits_processor import LogitsProcessor
@@ -45,6 +44,7 @@ from sglang.srt.model_loader.weight_utils import default_weight_loader
from sglang.srt.models.qwen2_vl import Qwen2VLVideoInputs
from sglang.srt.models.qwen3 import Qwen3Model
from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__)

View File

@@ -34,7 +34,6 @@ from sglang.srt.distributed import (
get_pp_group,
get_tensor_model_parallel_rank,
)
from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
from sglang.srt.layers.pooler import Pooler, PoolingType
@@ -58,6 +57,7 @@ from sglang.srt.models.qwen3_vl import (
Qwen3VLForConditionalGeneration,
)
from sglang.srt.utils import add_prefix
from sglang.srt.utils.hf_transformers_utils import get_processor
logger = logging.getLogger(__name__)

View File

@@ -24,7 +24,6 @@ from typing import List, Literal, Optional, Union
from sglang.srt.connector import ConnectorType
from sglang.srt.function_call.function_call_parser import FunctionCallParser
from sglang.srt.hf_transformers_utils import check_gguf_file, get_config
from sglang.srt.lora.lora_registry import LoRARef
from sglang.srt.parser.reasoning_parser import ReasoningParser
from sglang.srt.utils import (
@@ -47,6 +46,7 @@ from sglang.srt.utils import (
nullable_str,
parse_connector_type,
)
from sglang.srt.utils.hf_transformers_utils import check_gguf_file, get_config
from sglang.utils import is_in_ci
logger = logging.getLogger(__name__)

View File

@@ -0,0 +1,2 @@
# Temporarily do this to avoid changing all imports in the repo
from .common import *

View File

@@ -33,7 +33,7 @@ async def update_weights(
"""
infer_tp_size = device_mesh[device_mesh_key].mesh.size()[0]
infer_tp_rank = device_mesh[device_mesh_key].get_local_rank()
from sglang.srt.patch_torch import monkey_patch_torch_reductions
from sglang.srt.utils.patch_torch import monkey_patch_torch_reductions
monkey_patch_torch_reductions()

View File

@@ -30,8 +30,8 @@ from transformers import (
)
from sglang.srt.entrypoints.engine import Engine
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils import load_image
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
from sglang.test.test_utils import DEFAULT_PORT_FOR_SRT_TEST_RUNNER, calculate_rouge_l
DEFAULT_PROMPTS = [

View File

@@ -551,7 +551,7 @@ def test_gen_min_new_tokens():
We verify that the number of tokens in the answer is >= the min_tokens threshold.
"""
import sglang as sgl
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
model_path = sgl.global_config.default_backend.endpoint.get_model_name()
MIN_TOKENS, MAX_TOKENS = 64, 128

View File

@@ -921,7 +921,7 @@ def run_score_benchmark(
async def _run_benchmark():
# Load tokenizer for generating test data
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
tokenizer = get_tokenizer(model)