From fdc4e1e570503ddfc248aaa9ce6526c1eec92da2 Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Fri, 3 Oct 2025 22:40:06 +0800 Subject: [PATCH] Tiny move files to utils folder (#11166) --- benchmark/json_schema/bench_sglang.py | 2 +- .../multi_turn_chat/long_prompt_multi_turn.py | 2 +- .../token_in_token_out_llm_engine.py | 2 +- .../token_in_token_out_llm_server.py | 2 +- python/sglang/bench_one_batch.py | 2 +- python/sglang/bench_serving.py | 2 +- python/sglang/lang/backend/runtime_endpoint.py | 2 +- python/sglang/srt/configs/model_config.py | 8 ++++---- python/sglang/srt/lora/lora.py | 2 +- python/sglang/srt/lora/lora_manager.py | 2 +- python/sglang/srt/lora/mem_pool.py | 2 +- python/sglang/srt/lora/utils.py | 2 +- .../sglang/srt/managers/detokenizer_manager.py | 2 +- python/sglang/srt/managers/scheduler.py | 10 +++++----- .../srt/managers/scheduler_input_blocker.py | 2 +- .../srt/managers/scheduler_profiler_mixin.py | 2 +- python/sglang/srt/managers/tokenizer_manager.py | 10 +++++----- python/sglang/srt/managers/tp_worker.py | 12 ++++++------ .../srt/model_executor/cpu_graph_runner.py | 2 +- .../srt/model_executor/cuda_graph_runner.py | 2 +- python/sglang/srt/model_executor/model_runner.py | 4 ++-- python/sglang/srt/models/dots_ocr.py | 2 +- python/sglang/srt/models/gemma3_mm.py | 2 +- python/sglang/srt/models/gemma3n_mm.py | 2 +- python/sglang/srt/models/glm4v.py | 2 +- python/sglang/srt/models/glm4v_moe.py | 2 +- python/sglang/srt/models/qwen2_5_vl.py | 2 +- python/sglang/srt/models/qwen2_audio.py | 2 +- python/sglang/srt/models/qwen2_vl.py | 2 +- python/sglang/srt/models/qwen3_vl.py | 2 +- python/sglang/srt/models/qwen3_vl_moe.py | 2 +- python/sglang/srt/server_args.py | 2 +- python/sglang/srt/utils/__init__.py | 2 ++ python/sglang/srt/{utils.py => utils/common.py} | 0 .../srt/{ => utils}/hf_transformers_utils.py | 0 python/sglang/srt/{ => utils}/patch_torch.py | 0 .../sglang/srt/{ => utils}/poll_based_barrier.py | 0 python/sglang/srt/{ => utils}/rpd_utils.py | 0 .../sglang/srt/{ => utils}/slow_rank_detector.py | 0 python/sglang/srt/weight_sync/utils.py | 2 +- python/sglang/test/runners.py | 2 +- python/sglang/test/test_programs.py | 2 +- python/sglang/test/test_utils.py | 2 +- scripts/playground/load_tokenizer.py | 2 +- scripts/playground/reference_hf.py | 2 +- .../openai_server/basic/test_openai_server.py | 2 +- .../features/test_enable_thinking.py | 2 +- .../features/test_openai_server_ebnf.py | 2 +- .../features/test_openai_server_hidden_states.py | 2 +- .../test_openai_function_calling.py | 2 +- .../function_call/test_tool_choice.py | 2 +- .../validation/test_large_max_new_tokens.py | 2 +- .../validation/test_openai_server_ignore_eos.py | 2 +- test/srt/rl/test_verl_engine_2_gpu.py | 2 +- test/srt/rl/test_verl_engine_4_gpu.py | 2 +- test/srt/test_bench_serving.py | 2 +- test/srt/test_eagle_infer_a.py | 2 +- test/srt/test_fim_completion.py | 2 +- test/srt/test_forward_split_prefill.py | 2 +- test/srt/test_function_call_parser.py | 2 +- test/srt/test_patch_torch.py | 2 +- test/srt/test_sagemaker_server.py | 2 +- test/srt/test_session_control.py | 2 +- test/srt/test_srt_engine.py | 2 +- test/srt/test_tokenizer_batch_encode.py | 4 +++- test/srt/test_tokenizer_manager.py | 16 ++++++++++++---- 66 files changed, 91 insertions(+), 79 deletions(-) create mode 100644 python/sglang/srt/utils/__init__.py rename python/sglang/srt/{utils.py => utils/common.py} (100%) rename python/sglang/srt/{ => utils}/hf_transformers_utils.py (100%) rename python/sglang/srt/{ => utils}/patch_torch.py (100%) rename python/sglang/srt/{ => utils}/poll_based_barrier.py (100%) rename python/sglang/srt/{ => utils}/rpd_utils.py (100%) rename python/sglang/srt/{ => utils}/slow_rank_detector.py (100%) diff --git a/benchmark/json_schema/bench_sglang.py b/benchmark/json_schema/bench_sglang.py index 55365ff2e..8de68df34 100644 --- a/benchmark/json_schema/bench_sglang.py +++ b/benchmark/json_schema/bench_sglang.py @@ -8,7 +8,7 @@ from datasets import load_dataset import sglang as sgl from sglang.global_config import global_config -from sglang.srt.hf_transformers_utils import get_tokenizer +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.test_utils import ( add_common_sglang_args_and_parse, select_sglang_backend, diff --git a/benchmark/multi_turn_chat/long_prompt_multi_turn.py b/benchmark/multi_turn_chat/long_prompt_multi_turn.py index bda5bb9cc..88eba70cd 100644 --- a/benchmark/multi_turn_chat/long_prompt_multi_turn.py +++ b/benchmark/multi_turn_chat/long_prompt_multi_turn.py @@ -7,7 +7,7 @@ from pathlib import Path from tqdm import tqdm import sglang as sgl -from sglang.srt.hf_transformers_utils import get_tokenizer +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.test_utils import ( add_common_sglang_args_and_parse, select_sglang_backend, diff --git a/examples/runtime/token_in_token_out/token_in_token_out_llm_engine.py b/examples/runtime/token_in_token_out/token_in_token_out_llm_engine.py index cb1b7ddc1..11453f931 100644 --- a/examples/runtime/token_in_token_out/token_in_token_out_llm_engine.py +++ b/examples/runtime/token_in_token_out/token_in_token_out_llm_engine.py @@ -3,7 +3,7 @@ This example demonstrates how to provide tokenized ids to LLM as input instead o """ import sglang as sgl -from sglang.srt.hf_transformers_utils import get_tokenizer +from sglang.srt.utils.hf_transformers_utils import get_tokenizer MODEL_PATH = "meta-llama/Llama-3.1-8B-Instruct" diff --git a/examples/runtime/token_in_token_out/token_in_token_out_llm_server.py b/examples/runtime/token_in_token_out/token_in_token_out_llm_server.py index 00c0988b2..7e498f513 100644 --- a/examples/runtime/token_in_token_out/token_in_token_out_llm_server.py +++ b/examples/runtime/token_in_token_out/token_in_token_out_llm_server.py @@ -7,7 +7,7 @@ python token_in_token_out_llm_server.py import requests -from sglang.srt.hf_transformers_utils import get_tokenizer +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.test_utils import is_in_ci from sglang.utils import terminate_process, wait_for_server diff --git a/python/sglang/bench_one_batch.py b/python/sglang/bench_one_batch.py index 92f6e20d1..9def9d8d0 100644 --- a/python/sglang/bench_one_batch.py +++ b/python/sglang/bench_one_batch.py @@ -60,7 +60,6 @@ import torch.distributed as dist from sglang.srt.configs.model_config import ModelConfig from sglang.srt.distributed.parallel_state import destroy_distributed_environment from sglang.srt.entrypoints.engine import _set_envs_and_config -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.layers.moe import initialize_moe_config from sglang.srt.managers.schedule_batch import Req, ScheduleBatch from sglang.srt.managers.scheduler import Scheduler @@ -78,6 +77,7 @@ from sglang.srt.utils import ( set_gpu_proc_affinity, suppress_other_loggers, ) +from sglang.srt.utils.hf_transformers_utils import get_tokenizer @dataclasses.dataclass diff --git a/python/sglang/bench_serving.py b/python/sglang/bench_serving.py index 3f515a1e9..b7b969999 100644 --- a/python/sglang/bench_serving.py +++ b/python/sglang/bench_serving.py @@ -635,7 +635,7 @@ def get_tokenizer( if pretrained_model_name_or_path.endswith( ".json" ) or pretrained_model_name_or_path.endswith(".model"): - from sglang.srt.hf_transformers_utils import get_tokenizer + from sglang.srt.utils.hf_transformers_utils import get_tokenizer return get_tokenizer(pretrained_model_name_or_path) diff --git a/python/sglang/lang/backend/runtime_endpoint.py b/python/sglang/lang/backend/runtime_endpoint.py index 349f9934a..1573ca68d 100644 --- a/python/sglang/lang/backend/runtime_endpoint.py +++ b/python/sglang/lang/backend/runtime_endpoint.py @@ -433,7 +433,7 @@ class Runtime: self.endpoint.cache_prefix(prefix) def get_tokenizer(self): - from sglang.srt.hf_transformers_utils import get_tokenizer + from sglang.srt.utils.hf_transformers_utils import get_tokenizer return get_tokenizer( self.server_args.tokenizer_path, diff --git a/python/sglang/srt/configs/model_config.py b/python/sglang/srt/configs/model_config.py index f03573aac..38bfdc00f 100644 --- a/python/sglang/srt/configs/model_config.py +++ b/python/sglang/srt/configs/model_config.py @@ -23,16 +23,16 @@ import torch from transformers import PretrainedConfig from sglang.srt.environ import envs -from sglang.srt.hf_transformers_utils import ( +from sglang.srt.layers.quantization import QUANTIZATION_METHODS +from sglang.srt.server_args import ServerArgs +from sglang.srt.utils import is_hip, retry +from sglang.srt.utils.hf_transformers_utils import ( get_config, get_context_length, get_generation_config, get_hf_text_config, get_sparse_attention_config, ) -from sglang.srt.layers.quantization import QUANTIZATION_METHODS -from sglang.srt.server_args import ServerArgs -from sglang.srt.utils import is_hip, retry from sglang.utils import is_in_ci logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/lora/lora.py b/python/sglang/srt/lora/lora.py index 08d4c296f..b1277caca 100644 --- a/python/sglang/srt/lora/lora.py +++ b/python/sglang/srt/lora/lora.py @@ -26,12 +26,12 @@ import torch from torch import nn from sglang.srt.configs.load_config import LoadConfig -from sglang.srt.hf_transformers_utils import AutoConfig from sglang.srt.lora.backend.base_backend import BaseLoRABackend from sglang.srt.lora.backend.chunked_backend import ChunkedSgmvLoRABackend from sglang.srt.lora.backend.triton_backend import TritonLoRABackend from sglang.srt.lora.lora_config import LoRAConfig from sglang.srt.model_loader.loader import DefaultModelLoader +from sglang.srt.utils.hf_transformers_utils import AutoConfig logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/lora/lora_manager.py b/python/sglang/srt/lora/lora_manager.py index 1af4bea4f..2b90a8741 100644 --- a/python/sglang/srt/lora/lora_manager.py +++ b/python/sglang/srt/lora/lora_manager.py @@ -21,7 +21,6 @@ from typing import Dict, Iterable, List, Optional, Set, Tuple import torch from sglang.srt.configs.load_config import LoadConfig -from sglang.srt.hf_transformers_utils import AutoConfig from sglang.srt.lora.backend.base_backend import BaseLoRABackend, get_backend_from_name from sglang.srt.lora.layers import BaseLayerWithLoRA, get_lora_layer from sglang.srt.lora.lora import LoRAAdapter @@ -39,6 +38,7 @@ from sglang.srt.managers.io_struct import LoRAUpdateOutput from sglang.srt.model_executor.forward_batch_info import ForwardBatch from sglang.srt.server_args import ServerArgs from sglang.srt.utils import replace_submodule +from sglang.srt.utils.hf_transformers_utils import AutoConfig logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/lora/mem_pool.py b/python/sglang/srt/lora/mem_pool.py index cdf1707e8..107f9f508 100644 --- a/python/sglang/srt/lora/mem_pool.py +++ b/python/sglang/srt/lora/mem_pool.py @@ -4,7 +4,6 @@ from typing import Callable, Dict, Iterable, List, Optional, Set, Tuple, Union import torch from sglang.srt.distributed import divide -from sglang.srt.hf_transformers_utils import AutoConfig from sglang.srt.lora.layers import BaseLayerWithLoRA from sglang.srt.lora.lora import LoRAAdapter from sglang.srt.lora.lora_config import LoRAConfig @@ -17,6 +16,7 @@ from sglang.srt.lora.utils import ( get_stacked_multiply, get_target_module_name, ) +from sglang.srt.utils.hf_transformers_utils import AutoConfig logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/lora/utils.py b/python/sglang/srt/lora/utils.py index 83c8f1e89..7037fc4a6 100644 --- a/python/sglang/srt/lora/utils.py +++ b/python/sglang/srt/lora/utils.py @@ -5,7 +5,7 @@ from typing import Iterable, Optional, Set, Tuple import torch -from sglang.srt.hf_transformers_utils import AutoConfig +from sglang.srt.utils.hf_transformers_utils import AutoConfig @dataclass diff --git a/python/sglang/srt/managers/detokenizer_manager.py b/python/sglang/srt/managers/detokenizer_manager.py index 0169bd99a..68132991c 100644 --- a/python/sglang/srt/managers/detokenizer_manager.py +++ b/python/sglang/srt/managers/detokenizer_manager.py @@ -24,7 +24,6 @@ import psutil import setproctitle import zmq -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.managers.io_struct import ( BatchEmbeddingOutput, BatchMultimodalDecodeReq, @@ -42,6 +41,7 @@ from sglang.srt.utils import ( get_zmq_socket, kill_itself_when_parent_died, ) +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.utils import ( TypeBasedDispatcher, find_printable_text, diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index 2450cd46a..5a08f810d 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -60,11 +60,6 @@ from sglang.srt.disaggregation.utils import ( ) from sglang.srt.distributed import get_pp_group, get_world_group from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder -from sglang.srt.hf_transformers_utils import ( - get_processor, - get_tokenizer, - get_tokenizer_from_processor, -) from sglang.srt.layers.dp_attention import compute_dp_attention_world_info from sglang.srt.layers.logits_processor import LogitsProcessorOutput from sglang.srt.layers.moe import initialize_moe_config @@ -190,6 +185,11 @@ from sglang.srt.utils import ( set_random_seed, suppress_other_loggers, ) +from sglang.srt.utils.hf_transformers_utils import ( + get_processor, + get_tokenizer, + get_tokenizer_from_processor, +) from sglang.utils import TypeBasedDispatcher, get_exception_traceback logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/managers/scheduler_input_blocker.py b/python/sglang/srt/managers/scheduler_input_blocker.py index 60ae8d5d6..b6838ae43 100644 --- a/python/sglang/srt/managers/scheduler_input_blocker.py +++ b/python/sglang/srt/managers/scheduler_input_blocker.py @@ -17,7 +17,7 @@ from enum import Enum, auto from typing import Any, List, Optional from sglang.srt.managers.io_struct import BlockReqInput, BlockReqType -from sglang.srt.poll_based_barrier import PollBasedBarrier +from sglang.srt.utils.poll_based_barrier import PollBasedBarrier logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/managers/scheduler_profiler_mixin.py b/python/sglang/srt/managers/scheduler_profiler_mixin.py index a71214ac0..21e47f8c4 100644 --- a/python/sglang/srt/managers/scheduler_profiler_mixin.py +++ b/python/sglang/srt/managers/scheduler_profiler_mixin.py @@ -204,7 +204,7 @@ class SchedulerProfilerMixin: torch.distributed.barrier(self.tp_cpu_group) if self.tp_rank == 0: - from sglang.srt.rpd_utils import rpd_to_chrome_trace + from sglang.srt.utils.rpd_utils import rpd_to_chrome_trace rpd_to_chrome_trace("trace.rpd", self.rpd_profile_path) self.rpd_profiler = None diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index a003e8ae4..e9403a671 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -43,11 +43,6 @@ from fastapi import BackgroundTasks from sglang.srt.aio_rwlock import RWLock from sglang.srt.configs.model_config import ModelConfig from sglang.srt.disaggregation.utils import DisaggregationMode -from sglang.srt.hf_transformers_utils import ( - get_processor, - get_tokenizer, - get_tokenizer_from_processor, -) from sglang.srt.lora.lora_registry import LoRARegistry from sglang.srt.managers.async_dynamic_batch_tokenizer import AsyncDynamicbatchTokenizer from sglang.srt.managers.disagg_service import start_disagg_service @@ -99,6 +94,11 @@ from sglang.srt.utils import ( get_zmq_socket, kill_process_tree, ) +from sglang.srt.utils.hf_transformers_utils import ( + get_processor, + get_tokenizer, + get_tokenizer_from_processor, +) from sglang.utils import TypeBasedDispatcher, get_exception_traceback asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py index a4e087e5b..6a3ef3b96 100644 --- a/python/sglang/srt/managers/tp_worker.py +++ b/python/sglang/srt/managers/tp_worker.py @@ -22,11 +22,6 @@ import torch from sglang.srt.configs.model_config import ModelConfig from sglang.srt.distributed import get_pp_group, get_world_group -from sglang.srt.hf_transformers_utils import ( - get_processor, - get_tokenizer, - get_tokenizer_from_processor, -) from sglang.srt.layers.logits_processor import LogitsProcessorOutput from sglang.srt.managers.io_struct import ( DestroyWeightsUpdateGroupReqInput, @@ -49,9 +44,14 @@ from sglang.srt.model_executor.forward_batch_info import ( PPProxyTensors, ) from sglang.srt.model_executor.model_runner import ModelRunner -from sglang.srt.patch_torch import monkey_patch_torch_reductions from sglang.srt.server_args import ServerArgs from sglang.srt.utils import MultiprocessingSerializer, broadcast_pyobj, set_random_seed +from sglang.srt.utils.hf_transformers_utils import ( + get_processor, + get_tokenizer, + get_tokenizer_from_processor, +) +from sglang.srt.utils.patch_torch import monkey_patch_torch_reductions if TYPE_CHECKING: from sglang.srt.managers.cache_controller import LayerDoneCounter diff --git a/python/sglang/srt/model_executor/cpu_graph_runner.py b/python/sglang/srt/model_executor/cpu_graph_runner.py index f1f7aa7b0..9eda46722 100644 --- a/python/sglang/srt/model_executor/cpu_graph_runner.py +++ b/python/sglang/srt/model_executor/cpu_graph_runner.py @@ -34,7 +34,6 @@ from sglang.srt.model_executor.forward_batch_info import ( ForwardMode, PPProxyTensors, ) -from sglang.srt.patch_torch import monkey_patch_torch_compile from sglang.srt.speculative.spec_info import SpeculativeAlgorithm from sglang.srt.utils import ( log_info_on_rank0, @@ -43,6 +42,7 @@ from sglang.srt.utils import ( require_mlp_sync, require_mlp_tp_gather, ) +from sglang.srt.utils.patch_torch import monkey_patch_torch_compile logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/model_executor/cuda_graph_runner.py b/python/sglang/srt/model_executor/cuda_graph_runner.py index 81f0ce9e4..3ad2f450c 100644 --- a/python/sglang/srt/model_executor/cuda_graph_runner.py +++ b/python/sglang/srt/model_executor/cuda_graph_runner.py @@ -48,7 +48,6 @@ from sglang.srt.model_executor.forward_batch_info import ( PPProxyTensors, enable_num_token_non_padded, ) -from sglang.srt.patch_torch import monkey_patch_torch_compile from sglang.srt.two_batch_overlap import TboCudaGraphRunnerPlugin from sglang.srt.utils import ( empty_context, @@ -62,6 +61,7 @@ from sglang.srt.utils import ( require_mlp_sync, require_mlp_tp_gather, ) +from sglang.srt.utils.patch_torch import monkey_patch_torch_compile _is_hip = is_hip() diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index a73a6be3b..eb02c41e3 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -29,7 +29,6 @@ from typing import List, Optional, Tuple, Union import torch import torch.distributed as dist -from sglang.srt import slow_rank_detector from sglang.srt.configs.device_config import DeviceConfig from sglang.srt.configs.load_config import LoadConfig, LoadFormat from sglang.srt.configs.model_config import AttentionArch, ModelConfig @@ -115,7 +114,6 @@ from sglang.srt.offloader import ( get_offloader, set_offloader, ) -from sglang.srt.patch_torch import monkey_patch_torch_reductions from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo from sglang.srt.server_args import ServerArgs from sglang.srt.speculative.spec_info import SpeculativeAlgorithm @@ -140,7 +138,9 @@ from sglang.srt.utils import ( monkey_patch_p2p_access_check, monkey_patch_vllm_gguf_config, set_cuda_arch, + slow_rank_detector, ) +from sglang.srt.utils.patch_torch import monkey_patch_torch_reductions from sglang.srt.weight_sync.tensor_bucket import ( FlattenedTensorBucket, FlattenedTensorMetadata, diff --git a/python/sglang/srt/models/dots_ocr.py b/python/sglang/srt/models/dots_ocr.py index b0202367b..ee48909ed 100644 --- a/python/sglang/srt/models/dots_ocr.py +++ b/python/sglang/srt/models/dots_ocr.py @@ -9,7 +9,6 @@ import torch.nn as nn from transformers.activations import ACT2FN from sglang.srt.configs import DotsOCRConfig -from sglang.srt.hf_transformers_utils import get_processor from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.quantization.base_config import QuantizationConfig from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead @@ -23,6 +22,7 @@ from sglang.srt.model_loader.weight_utils import default_weight_loader from sglang.srt.models.dots_vlm_vit import DotsVisionTransformer from sglang.srt.models.qwen2 import Qwen2ForCausalLM from sglang.srt.utils import add_prefix +from sglang.srt.utils.hf_transformers_utils import get_processor logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/models/gemma3_mm.py b/python/sglang/srt/models/gemma3_mm.py index 527a11b69..8060fdee9 100644 --- a/python/sglang/srt/models/gemma3_mm.py +++ b/python/sglang/srt/models/gemma3_mm.py @@ -23,7 +23,6 @@ import torch from torch import nn from transformers import Gemma3Config, PreTrainedModel -from sglang.srt.hf_transformers_utils import get_processor from sglang.srt.layers.layernorm import Gemma3RMSNorm from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.quantization.base_config import QuantizationConfig @@ -44,6 +43,7 @@ from sglang.srt.model_loader.weight_utils import ( from sglang.srt.models.gemma3_causal import Gemma3ForCausalLM from sglang.srt.models.siglip import SiglipVisionModel from sglang.srt.utils import add_prefix +from sglang.srt.utils.hf_transformers_utils import get_processor logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/models/gemma3n_mm.py b/python/sglang/srt/models/gemma3n_mm.py index 995db2602..3c52635dd 100644 --- a/python/sglang/srt/models/gemma3n_mm.py +++ b/python/sglang/srt/models/gemma3n_mm.py @@ -14,7 +14,6 @@ from transformers import ( ) from transformers.models.auto.modeling_auto import AutoModel -from sglang.srt.hf_transformers_utils import get_processor from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear from sglang.srt.layers.logits_processor import LogitsProcessor @@ -38,6 +37,7 @@ from sglang.srt.model_loader.weight_utils import ( from sglang.srt.models.gemma3n_audio import Gemma3nAudioEncoder from sglang.srt.models.gemma3n_causal import Gemma3nRMSNorm, Gemma3nTextModel from sglang.srt.utils import add_prefix +from sglang.srt.utils.hf_transformers_utils import get_processor logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/models/glm4v.py b/python/sglang/srt/models/glm4v.py index 63c955a72..0aab90a6a 100644 --- a/python/sglang/srt/models/glm4v.py +++ b/python/sglang/srt/models/glm4v.py @@ -7,7 +7,6 @@ import torch.nn as nn import torch.nn.functional as F from transformers.models.glm4v.configuration_glm4v import Glm4vConfig, Glm4vVisionConfig -from sglang.srt.hf_transformers_utils import get_processor from sglang.srt.layers.activation import SiluAndMul from sglang.srt.layers.attention import vision_utils from sglang.srt.layers.layernorm import RMSNorm @@ -28,6 +27,7 @@ from sglang.srt.models.qwen2_5_vl import ( Qwen2_5_VLForConditionalGeneration, ) from sglang.srt.utils import add_prefix +from sglang.srt.utils.hf_transformers_utils import get_processor logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/models/glm4v_moe.py b/python/sglang/srt/models/glm4v_moe.py index 2a17e04b0..fb3d26f11 100644 --- a/python/sglang/srt/models/glm4v_moe.py +++ b/python/sglang/srt/models/glm4v_moe.py @@ -10,7 +10,6 @@ from sglang.srt.distributed import ( get_moe_expert_parallel_world_size, get_tensor_model_parallel_world_size, ) -from sglang.srt.hf_transformers_utils import get_processor from sglang.srt.layers.attention import vision_utils from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.moe.fused_moe_triton import FusedMoE @@ -22,6 +21,7 @@ from sglang.srt.model_loader.weight_utils import default_weight_loader from sglang.srt.models.glm4_moe import Glm4MoeModel from sglang.srt.models.glm4v import Glm4vForConditionalGeneration, Glm4vVisionModel from sglang.srt.utils import add_prefix, is_cuda, log_info_on_rank0 +from sglang.srt.utils.hf_transformers_utils import get_processor _is_cuda = is_cuda() diff --git a/python/sglang/srt/models/qwen2_5_vl.py b/python/sglang/srt/models/qwen2_5_vl.py index 6c70629c2..73d08d42d 100644 --- a/python/sglang/srt/models/qwen2_5_vl.py +++ b/python/sglang/srt/models/qwen2_5_vl.py @@ -40,7 +40,6 @@ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import ( Qwen2_5_VisionRotaryEmbedding, ) -from sglang.srt.hf_transformers_utils import get_processor from sglang.srt.layers.attention.vision import VisionAttention from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.linear import ( @@ -61,6 +60,7 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch from sglang.srt.model_loader.weight_utils import default_weight_loader from sglang.srt.models.qwen2 import Qwen2Model from sglang.srt.utils import add_prefix +from sglang.srt.utils.hf_transformers_utils import get_processor logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/models/qwen2_audio.py b/python/sglang/srt/models/qwen2_audio.py index 180ee801b..8609758a9 100644 --- a/python/sglang/srt/models/qwen2_audio.py +++ b/python/sglang/srt/models/qwen2_audio.py @@ -39,7 +39,6 @@ from transformers.models.qwen2_audio.modeling_qwen2_audio import ( Qwen2AudioMultiModalProjector, ) -from sglang.srt.hf_transformers_utils import get_processor from sglang.srt.layers.activation import QuickGELU from sglang.srt.layers.attention.vision import VisionAttention from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear @@ -61,6 +60,7 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch from sglang.srt.model_loader.weight_utils import default_weight_loader from sglang.srt.models.qwen2 import Qwen2ForCausalLM from sglang.srt.utils import add_prefix +from sglang.srt.utils.hf_transformers_utils import get_processor logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/models/qwen2_vl.py b/python/sglang/srt/models/qwen2_vl.py index 55f325813..a44e94a07 100644 --- a/python/sglang/srt/models/qwen2_vl.py +++ b/python/sglang/srt/models/qwen2_vl.py @@ -33,7 +33,6 @@ from einops import rearrange from transformers import Qwen2VLConfig from transformers.models.qwen2_vl.configuration_qwen2_vl import Qwen2VLVisionConfig -from sglang.srt.hf_transformers_utils import get_processor from sglang.srt.layers.activation import QuickGELU from sglang.srt.layers.attention.vision import VisionAttention from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear @@ -50,6 +49,7 @@ from sglang.srt.model_executor.forward_batch_info import ForwardBatch from sglang.srt.model_loader.weight_utils import default_weight_loader from sglang.srt.models.qwen2 import Qwen2Model from sglang.srt.utils import add_prefix +from sglang.srt.utils.hf_transformers_utils import get_processor logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/models/qwen3_vl.py b/python/sglang/srt/models/qwen3_vl.py index a87d21e78..d37d06e73 100644 --- a/python/sglang/srt/models/qwen3_vl.py +++ b/python/sglang/srt/models/qwen3_vl.py @@ -28,7 +28,6 @@ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import ( ) from sglang.srt.configs.qwen3_vl import Qwen3VLConfig, Qwen3VLVisionConfig -from sglang.srt.hf_transformers_utils import get_processor from sglang.srt.layers.attention.vision import VisionAttention from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear from sglang.srt.layers.logits_processor import LogitsProcessor @@ -45,6 +44,7 @@ from sglang.srt.model_loader.weight_utils import default_weight_loader from sglang.srt.models.qwen2_vl import Qwen2VLVideoInputs from sglang.srt.models.qwen3 import Qwen3Model from sglang.srt.utils import add_prefix +from sglang.srt.utils.hf_transformers_utils import get_processor logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/models/qwen3_vl_moe.py b/python/sglang/srt/models/qwen3_vl_moe.py index a88059916..08c1d1758 100644 --- a/python/sglang/srt/models/qwen3_vl_moe.py +++ b/python/sglang/srt/models/qwen3_vl_moe.py @@ -34,7 +34,6 @@ from sglang.srt.distributed import ( get_pp_group, get_tensor_model_parallel_rank, ) -from sglang.srt.hf_transformers_utils import get_processor from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE from sglang.srt.layers.pooler import Pooler, PoolingType @@ -58,6 +57,7 @@ from sglang.srt.models.qwen3_vl import ( Qwen3VLForConditionalGeneration, ) from sglang.srt.utils import add_prefix +from sglang.srt.utils.hf_transformers_utils import get_processor logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index f9ff382a1..5c7e2f57b 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -24,7 +24,6 @@ from typing import List, Literal, Optional, Union from sglang.srt.connector import ConnectorType from sglang.srt.function_call.function_call_parser import FunctionCallParser -from sglang.srt.hf_transformers_utils import check_gguf_file, get_config from sglang.srt.lora.lora_registry import LoRARef from sglang.srt.parser.reasoning_parser import ReasoningParser from sglang.srt.utils import ( @@ -47,6 +46,7 @@ from sglang.srt.utils import ( nullable_str, parse_connector_type, ) +from sglang.srt.utils.hf_transformers_utils import check_gguf_file, get_config from sglang.utils import is_in_ci logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/utils/__init__.py b/python/sglang/srt/utils/__init__.py new file mode 100644 index 000000000..5fb724e1a --- /dev/null +++ b/python/sglang/srt/utils/__init__.py @@ -0,0 +1,2 @@ +# Temporarily do this to avoid changing all imports in the repo +from .common import * diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils/common.py similarity index 100% rename from python/sglang/srt/utils.py rename to python/sglang/srt/utils/common.py diff --git a/python/sglang/srt/hf_transformers_utils.py b/python/sglang/srt/utils/hf_transformers_utils.py similarity index 100% rename from python/sglang/srt/hf_transformers_utils.py rename to python/sglang/srt/utils/hf_transformers_utils.py diff --git a/python/sglang/srt/patch_torch.py b/python/sglang/srt/utils/patch_torch.py similarity index 100% rename from python/sglang/srt/patch_torch.py rename to python/sglang/srt/utils/patch_torch.py diff --git a/python/sglang/srt/poll_based_barrier.py b/python/sglang/srt/utils/poll_based_barrier.py similarity index 100% rename from python/sglang/srt/poll_based_barrier.py rename to python/sglang/srt/utils/poll_based_barrier.py diff --git a/python/sglang/srt/rpd_utils.py b/python/sglang/srt/utils/rpd_utils.py similarity index 100% rename from python/sglang/srt/rpd_utils.py rename to python/sglang/srt/utils/rpd_utils.py diff --git a/python/sglang/srt/slow_rank_detector.py b/python/sglang/srt/utils/slow_rank_detector.py similarity index 100% rename from python/sglang/srt/slow_rank_detector.py rename to python/sglang/srt/utils/slow_rank_detector.py diff --git a/python/sglang/srt/weight_sync/utils.py b/python/sglang/srt/weight_sync/utils.py index f308207e2..97ed4ae50 100644 --- a/python/sglang/srt/weight_sync/utils.py +++ b/python/sglang/srt/weight_sync/utils.py @@ -33,7 +33,7 @@ async def update_weights( """ infer_tp_size = device_mesh[device_mesh_key].mesh.size()[0] infer_tp_rank = device_mesh[device_mesh_key].get_local_rank() - from sglang.srt.patch_torch import monkey_patch_torch_reductions + from sglang.srt.utils.patch_torch import monkey_patch_torch_reductions monkey_patch_torch_reductions() diff --git a/python/sglang/test/runners.py b/python/sglang/test/runners.py index 8ce2e2e20..9e64457fc 100644 --- a/python/sglang/test/runners.py +++ b/python/sglang/test/runners.py @@ -30,8 +30,8 @@ from transformers import ( ) from sglang.srt.entrypoints.engine import Engine -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils import load_image +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.test_utils import DEFAULT_PORT_FOR_SRT_TEST_RUNNER, calculate_rouge_l DEFAULT_PROMPTS = [ diff --git a/python/sglang/test/test_programs.py b/python/sglang/test/test_programs.py index 6756f2dd7..dcd3f4131 100644 --- a/python/sglang/test/test_programs.py +++ b/python/sglang/test/test_programs.py @@ -551,7 +551,7 @@ def test_gen_min_new_tokens(): We verify that the number of tokens in the answer is >= the min_tokens threshold. """ import sglang as sgl - from sglang.srt.hf_transformers_utils import get_tokenizer + from sglang.srt.utils.hf_transformers_utils import get_tokenizer model_path = sgl.global_config.default_backend.endpoint.get_model_name() MIN_TOKENS, MAX_TOKENS = 64, 128 diff --git a/python/sglang/test/test_utils.py b/python/sglang/test/test_utils.py index 060a41c13..bfe515c15 100644 --- a/python/sglang/test/test_utils.py +++ b/python/sglang/test/test_utils.py @@ -921,7 +921,7 @@ def run_score_benchmark( async def _run_benchmark(): # Load tokenizer for generating test data - from sglang.srt.hf_transformers_utils import get_tokenizer + from sglang.srt.utils.hf_transformers_utils import get_tokenizer tokenizer = get_tokenizer(model) diff --git a/scripts/playground/load_tokenizer.py b/scripts/playground/load_tokenizer.py index 94cf34bc7..6fccc2566 100644 --- a/scripts/playground/load_tokenizer.py +++ b/scripts/playground/load_tokenizer.py @@ -1,7 +1,7 @@ import argparse import code -from sglang.srt.hf_transformers_utils import get_tokenizer +from sglang.srt.utils.hf_transformers_utils import get_tokenizer if __name__ == "__main__": parser = argparse.ArgumentParser() diff --git a/scripts/playground/reference_hf.py b/scripts/playground/reference_hf.py index 14d23fb76..538c31f77 100644 --- a/scripts/playground/reference_hf.py +++ b/scripts/playground/reference_hf.py @@ -38,7 +38,7 @@ from transformers import ( AutoProcessor, ) -from sglang.srt.hf_transformers_utils import get_tokenizer +from sglang.srt.utils.hf_transformers_utils import get_tokenizer @torch.no_grad() diff --git a/test/srt/openai_server/basic/test_openai_server.py b/test/srt/openai_server/basic/test_openai_server.py index f42039bff..8fed9051e 100644 --- a/test/srt/openai_server/basic/test_openai_server.py +++ b/test/srt/openai_server/basic/test_openai_server.py @@ -13,8 +13,8 @@ import numpy as np import openai import requests -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils import kill_process_tree +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.runners import TEST_RERANK_QUERY_DOCS from sglang.test.test_utils import ( DEFAULT_SMALL_CROSS_ENCODER_MODEL_NAME_FOR_TEST, diff --git a/test/srt/openai_server/features/test_enable_thinking.py b/test/srt/openai_server/features/test_enable_thinking.py index 00ba4fc94..5e03d17de 100644 --- a/test/srt/openai_server/features/test_enable_thinking.py +++ b/test/srt/openai_server/features/test_enable_thinking.py @@ -16,8 +16,8 @@ import unittest import openai import requests -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils import kill_process_tree +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.test_utils import ( DEFAULT_ENABLE_THINKING_MODEL_NAME_FOR_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, diff --git a/test/srt/openai_server/features/test_openai_server_ebnf.py b/test/srt/openai_server/features/test_openai_server_ebnf.py index 126556ed7..0104d398d 100644 --- a/test/srt/openai_server/features/test_openai_server_ebnf.py +++ b/test/srt/openai_server/features/test_openai_server_ebnf.py @@ -2,8 +2,8 @@ import re import openai -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils import kill_process_tree +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.test_utils import ( DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, diff --git a/test/srt/openai_server/features/test_openai_server_hidden_states.py b/test/srt/openai_server/features/test_openai_server_hidden_states.py index 34e5ddde7..bb066e691 100644 --- a/test/srt/openai_server/features/test_openai_server_hidden_states.py +++ b/test/srt/openai_server/features/test_openai_server_hidden_states.py @@ -8,8 +8,8 @@ import numpy as np import openai import torch -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils import kill_process_tree +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.test_utils import ( DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST, DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST, diff --git a/test/srt/openai_server/function_call/test_openai_function_calling.py b/test/srt/openai_server/function_call/test_openai_function_calling.py index 1bb95693f..fe5a49728 100644 --- a/test/srt/openai_server/function_call/test_openai_function_calling.py +++ b/test/srt/openai_server/function_call/test_openai_function_calling.py @@ -4,8 +4,8 @@ import unittest import openai -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils import kill_process_tree +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.test_utils import ( DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, diff --git a/test/srt/openai_server/function_call/test_tool_choice.py b/test/srt/openai_server/function_call/test_tool_choice.py index 782641e51..f324f66e6 100644 --- a/test/srt/openai_server/function_call/test_tool_choice.py +++ b/test/srt/openai_server/function_call/test_tool_choice.py @@ -12,8 +12,8 @@ import unittest import openai -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils import kill_process_tree +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.test_utils import ( DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_URL_FOR_TEST, diff --git a/test/srt/openai_server/validation/test_large_max_new_tokens.py b/test/srt/openai_server/validation/test_large_max_new_tokens.py index 49601a784..e1e2aa8f9 100644 --- a/test/srt/openai_server/validation/test_large_max_new_tokens.py +++ b/test/srt/openai_server/validation/test_large_max_new_tokens.py @@ -9,8 +9,8 @@ from concurrent.futures import ThreadPoolExecutor import openai -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils import kill_process_tree +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.test_utils import ( DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, diff --git a/test/srt/openai_server/validation/test_openai_server_ignore_eos.py b/test/srt/openai_server/validation/test_openai_server_ignore_eos.py index a3594dfd0..7c69011f8 100644 --- a/test/srt/openai_server/validation/test_openai_server_ignore_eos.py +++ b/test/srt/openai_server/validation/test_openai_server_ignore_eos.py @@ -1,7 +1,7 @@ import openai -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils import kill_process_tree +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.test_utils import ( DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, diff --git a/test/srt/rl/test_verl_engine_2_gpu.py b/test/srt/rl/test_verl_engine_2_gpu.py index 40321ee3f..39b2e6887 100644 --- a/test/srt/rl/test_verl_engine_2_gpu.py +++ b/test/srt/rl/test_verl_engine_2_gpu.py @@ -19,8 +19,8 @@ from torch.distributed.fsdp.api import ( from transformers import AutoModelForCausalLM from sglang.srt.entrypoints.verl_engine import VerlEngine -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils import is_port_available +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.runners import ( HFRunner, SRTRunner, diff --git a/test/srt/rl/test_verl_engine_4_gpu.py b/test/srt/rl/test_verl_engine_4_gpu.py index 014f17daf..fb137cab4 100644 --- a/test/srt/rl/test_verl_engine_4_gpu.py +++ b/test/srt/rl/test_verl_engine_4_gpu.py @@ -19,8 +19,8 @@ from torch.distributed.fsdp.api import ( from transformers import AutoModelForCausalLM from sglang.srt.entrypoints.verl_engine import VerlEngine -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils import is_port_available +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.runners import ( HFRunner, SRTRunner, diff --git a/test/srt/test_bench_serving.py b/test/srt/test_bench_serving.py index 6a73566e1..978033e3b 100644 --- a/test/srt/test_bench_serving.py +++ b/test/srt/test_bench_serving.py @@ -4,7 +4,7 @@ import unittest import requests -from sglang.srt.hf_transformers_utils import get_tokenizer +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.test_utils import ( DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST, DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST, diff --git a/test/srt/test_eagle_infer_a.py b/test/srt/test_eagle_infer_a.py index f956059c0..eb6813a0d 100644 --- a/test/srt/test_eagle_infer_a.py +++ b/test/srt/test_eagle_infer_a.py @@ -4,8 +4,8 @@ import requests import torch import sglang as sgl -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils import kill_process_tree +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.test_utils import ( DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST, DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST, diff --git a/test/srt/test_fim_completion.py b/test/srt/test_fim_completion.py index 09db1d4bc..6efdfe776 100644 --- a/test/srt/test_fim_completion.py +++ b/test/srt/test_fim_completion.py @@ -2,8 +2,8 @@ import unittest import openai -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils import kill_process_tree +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.test_utils import ( DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_URL_FOR_TEST, diff --git a/test/srt/test_forward_split_prefill.py b/test/srt/test_forward_split_prefill.py index 3100c8d00..060535687 100644 --- a/test/srt/test_forward_split_prefill.py +++ b/test/srt/test_forward_split_prefill.py @@ -13,13 +13,13 @@ import numpy as np import torch from sglang.srt.configs.model_config import ModelConfig -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.managers.schedule_batch import Req, ScheduleBatch from sglang.srt.model_executor.forward_batch_info import ForwardBatch from sglang.srt.model_executor.model_runner import ModelRunner from sglang.srt.sampling.sampling_params import SamplingParams from sglang.srt.server_args import PortArgs, ServerArgs from sglang.srt.speculative.spec_info import SpeculativeAlgorithm +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST, CustomTestCase diff --git a/test/srt/test_function_call_parser.py b/test/srt/test_function_call_parser.py index 8fd1e8bbd..b945077b9 100644 --- a/test/srt/test_function_call_parser.py +++ b/test/srt/test_function_call_parser.py @@ -15,7 +15,7 @@ from sglang.srt.function_call.mistral_detector import MistralDetector from sglang.srt.function_call.pythonic_detector import PythonicDetector from sglang.srt.function_call.qwen3_coder_detector import Qwen3CoderDetector from sglang.srt.function_call.qwen25_detector import Qwen25Detector -from sglang.srt.hf_transformers_utils import get_tokenizer +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.test_utils import DEFAULT_SMALL_MODEL_NAME_FOR_TEST diff --git a/test/srt/test_patch_torch.py b/test/srt/test_patch_torch.py index a2c04509e..c1319dacb 100644 --- a/test/srt/test_patch_torch.py +++ b/test/srt/test_patch_torch.py @@ -6,7 +6,7 @@ from typing import Dict, List import torch import torch.multiprocessing as mp -from sglang.srt.patch_torch import monkey_patch_torch_reductions +from sglang.srt.utils.patch_torch import monkey_patch_torch_reductions class TestReleaseMemoryOccupation(unittest.TestCase): diff --git a/test/srt/test_sagemaker_server.py b/test/srt/test_sagemaker_server.py index 68688c112..81ab9790c 100644 --- a/test/srt/test_sagemaker_server.py +++ b/test/srt/test_sagemaker_server.py @@ -7,8 +7,8 @@ import unittest import requests -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils import kill_process_tree +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.test_utils import ( DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, diff --git a/test/srt/test_session_control.py b/test/srt/test_session_control.py index 4b0da75dc..8088f7893 100644 --- a/test/srt/test_session_control.py +++ b/test/srt/test_session_control.py @@ -13,8 +13,8 @@ import unittest import aiohttp import requests -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.utils import kill_process_tree +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.test_utils import ( DEFAULT_SMALL_MODEL_NAME_FOR_TEST, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, diff --git a/test/srt/test_srt_engine.py b/test/srt/test_srt_engine.py index a50669d48..d370f6290 100644 --- a/test/srt/test_srt_engine.py +++ b/test/srt/test_srt_engine.py @@ -12,8 +12,8 @@ import torch import sglang as sgl from sglang.bench_offline_throughput import BenchArgs, throughput_test -from sglang.srt.hf_transformers_utils import get_tokenizer from sglang.srt.server_args import ServerArgs +from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.test.few_shot_gsm8k_engine import run_eval from sglang.test.test_utils import ( DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST, diff --git a/test/srt/test_tokenizer_batch_encode.py b/test/srt/test_tokenizer_batch_encode.py index f3294c049..13d294d68 100644 --- a/test/srt/test_tokenizer_batch_encode.py +++ b/test/srt/test_tokenizer_batch_encode.py @@ -34,7 +34,9 @@ class TestTokenizerBatchEncode(unittest.TestCase): with patch("zmq.asyncio.Context"), patch( "sglang.srt.utils.get_zmq_socket" - ), patch("sglang.srt.hf_transformers_utils.get_tokenizer") as mock_tokenizer: + ), patch( + "sglang.srt.utils.hf_transformers_utils.get_tokenizer" + ) as mock_tokenizer: mock_tokenizer.return_value = Mock(vocab_size=32000) self.tokenizer_manager = TokenizerManager(self.server_args, self.port_args) diff --git a/test/srt/test_tokenizer_manager.py b/test/srt/test_tokenizer_manager.py index a12987279..d1817e6d9 100644 --- a/test/srt/test_tokenizer_manager.py +++ b/test/srt/test_tokenizer_manager.py @@ -31,7 +31,9 @@ class TestInputFormatDetection(unittest.TestCase): with patch("zmq.asyncio.Context"), patch( "sglang.srt.utils.get_zmq_socket" - ), patch("sglang.srt.hf_transformers_utils.get_tokenizer") as mock_tokenizer: + ), patch( + "sglang.srt.utils.hf_transformers_utils.get_tokenizer" + ) as mock_tokenizer: mock_tokenizer.return_value = Mock(vocab_size=32000) self.tokenizer_manager = TokenizerManager(self.server_args, self.port_args) @@ -125,7 +127,9 @@ class TestTokenizerInputPreparation(unittest.TestCase): with patch("zmq.asyncio.Context"), patch( "sglang.srt.utils.get_zmq_socket" - ), patch("sglang.srt.hf_transformers_utils.get_tokenizer") as mock_tokenizer: + ), patch( + "sglang.srt.utils.hf_transformers_utils.get_tokenizer" + ) as mock_tokenizer: mock_tokenizer.return_value = Mock(vocab_size=32000) self.tokenizer_manager = TokenizerManager(self.server_args, self.port_args) @@ -177,7 +181,9 @@ class TestTokenizerResultExtraction(unittest.TestCase): with patch("zmq.asyncio.Context"), patch( "sglang.srt.utils.get_zmq_socket" - ), patch("sglang.srt.hf_transformers_utils.get_tokenizer") as mock_tokenizer: + ), patch( + "sglang.srt.utils.hf_transformers_utils.get_tokenizer" + ) as mock_tokenizer: mock_tokenizer.return_value = Mock(vocab_size=32000) self.tokenizer_manager = TokenizerManager(self.server_args, self.port_args) @@ -279,7 +285,9 @@ class TestTokenizerManagerIntegration(unittest.TestCase): with patch("zmq.asyncio.Context"), patch( "sglang.srt.utils.get_zmq_socket" - ), patch("sglang.srt.hf_transformers_utils.get_tokenizer") as mock_tokenizer: + ), patch( + "sglang.srt.utils.hf_transformers_utils.get_tokenizer" + ) as mock_tokenizer: mock_tokenizer.return_value = Mock(vocab_size=32000) self.tokenizer_manager = TokenizerManager(self.server_args, self.port_args)