From 60e37f8028e726016ecaf952767876a3d41a1898 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Tue, 2 Sep 2025 18:25:04 -0700 Subject: [PATCH] Move parsers under a single folder (#9912) --- docs/advanced_features/separate_reasoning.ipynb | 2 +- docs/advanced_features/vlm_query.ipynb | 4 ++-- examples/runtime/engine/offline_batch_inference_vlm.py | 2 +- python/sglang/lang/interpreter.py | 2 +- python/sglang/srt/entrypoints/http_server.py | 2 +- python/sglang/srt/entrypoints/openai/serving_chat.py | 6 +++--- python/sglang/srt/entrypoints/openai/serving_completions.py | 4 +++- python/sglang/srt/entrypoints/openai/serving_embedding.py | 2 +- python/sglang/srt/entrypoints/openai/serving_responses.py | 2 +- python/sglang/srt/function_call/gpt_oss_detector.py | 2 +- python/sglang/srt/{ => layers}/model_parallel.py | 0 python/sglang/srt/layers/moe/utils.py | 1 - python/sglang/srt/managers/scheduler.py | 2 +- python/sglang/srt/managers/template_manager.py | 6 +++--- python/sglang/srt/model_executor/model_runner.py | 2 +- python/sglang/srt/models/torch_native_llama.py | 2 +- python/sglang/srt/{ => parser}/code_completion_parser.py | 0 python/sglang/srt/{ => parser}/conversation.py | 0 python/sglang/srt/{ => parser}/harmony_parser.py | 0 python/sglang/srt/{ => parser}/jinja_template_utils.py | 0 python/sglang/srt/{ => parser}/reasoning_parser.py | 2 +- python/sglang/srt/server_args.py | 2 +- test/lang/test_separate_reasoning_execution.py | 6 +++--- test/srt/test_harmony_parser.py | 2 +- test/srt/test_jinja_template_utils.py | 2 +- test/srt/test_reasoning_parser.py | 2 +- test/srt/test_vlm_accuracy.py | 2 +- test/srt/test_vlm_input_format.py | 2 +- 28 files changed, 31 insertions(+), 30 deletions(-) rename python/sglang/srt/{ => layers}/model_parallel.py (100%) rename python/sglang/srt/{ => parser}/code_completion_parser.py (100%) rename python/sglang/srt/{ => parser}/conversation.py (100%) rename python/sglang/srt/{ => parser}/harmony_parser.py (100%) rename python/sglang/srt/{ => parser}/jinja_template_utils.py (100%) rename python/sglang/srt/{ => parser}/reasoning_parser.py (99%) diff --git a/docs/advanced_features/separate_reasoning.ipynb b/docs/advanced_features/separate_reasoning.ipynb index 586d3a978..8850863a4 100644 --- a/docs/advanced_features/separate_reasoning.ipynb +++ b/docs/advanced_features/separate_reasoning.ipynb @@ -313,7 +313,7 @@ "outputs": [], "source": [ "import sglang as sgl\n", - "from sglang.srt.reasoning_parser import ReasoningParser\n", + "from sglang.srt.parser.reasoning_parser import ReasoningParser\n", "from sglang.utils import print_highlight\n", "\n", "llm = sgl.Engine(model_path=\"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B\")\n", diff --git a/docs/advanced_features/vlm_query.ipynb b/docs/advanced_features/vlm_query.ipynb index b85b20212..d9a8ae75d 100644 --- a/docs/advanced_features/vlm_query.ipynb +++ b/docs/advanced_features/vlm_query.ipynb @@ -44,7 +44,7 @@ "import requests\n", "from PIL import Image\n", "\n", - "from sglang.srt.conversation import chat_templates\n", + "from sglang.srt.parser.conversation import chat_templates\n", "\n", "image = Image.open(\n", " BytesIO(\n", @@ -182,7 +182,7 @@ "import requests\n", "from PIL import Image\n", "\n", - "from sglang.srt.conversation import chat_templates\n", + "from sglang.srt.parser.conversation import chat_templates\n", "\n", "image = Image.open(\n", " BytesIO(\n", diff --git a/examples/runtime/engine/offline_batch_inference_vlm.py b/examples/runtime/engine/offline_batch_inference_vlm.py index 459a048cc..392823946 100644 --- a/examples/runtime/engine/offline_batch_inference_vlm.py +++ b/examples/runtime/engine/offline_batch_inference_vlm.py @@ -7,7 +7,7 @@ import argparse import dataclasses import sglang as sgl -from sglang.srt.conversation import chat_templates +from sglang.srt.parser.conversation import chat_templates from sglang.srt.server_args import ServerArgs diff --git a/python/sglang/lang/interpreter.py b/python/sglang/lang/interpreter.py index ab3457cbf..8b8cdf9c5 100644 --- a/python/sglang/lang/interpreter.py +++ b/python/sglang/lang/interpreter.py @@ -740,7 +740,7 @@ class StreamExecutor: # Execute the stored lazy generation calls self.backend.role_end_generate(self) - from sglang.srt.reasoning_parser import ReasoningParser + from sglang.srt.parser.reasoning_parser import ReasoningParser reasoning_parser = ReasoningParser(expr.model_type) other = expr.expr diff --git a/python/sglang/srt/entrypoints/http_server.py b/python/sglang/srt/entrypoints/http_server.py index 70d7deb1e..b0534641e 100644 --- a/python/sglang/srt/entrypoints/http_server.py +++ b/python/sglang/srt/entrypoints/http_server.py @@ -102,7 +102,7 @@ from sglang.srt.managers.multi_tokenizer_mixin import ( from sglang.srt.managers.template_manager import TemplateManager from sglang.srt.managers.tokenizer_manager import ServerStatus, TokenizerManager from sglang.srt.metrics.func_timer import enable_func_timer -from sglang.srt.reasoning_parser import ReasoningParser +from sglang.srt.parser.reasoning_parser import ReasoningParser from sglang.srt.server_args import PortArgs, ServerArgs from sglang.srt.utils import ( add_api_key_middleware, diff --git a/python/sglang/srt/entrypoints/openai/serving_chat.py b/python/sglang/srt/entrypoints/openai/serving_chat.py index 4043203ef..690604922 100644 --- a/python/sglang/srt/entrypoints/openai/serving_chat.py +++ b/python/sglang/srt/entrypoints/openai/serving_chat.py @@ -8,7 +8,6 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Union from fastapi import Request from fastapi.responses import ORJSONResponse, StreamingResponse -from sglang.srt.conversation import generate_chat_conv from sglang.srt.entrypoints.openai.protocol import ( ChatCompletionRequest, ChatCompletionResponse, @@ -33,11 +32,12 @@ from sglang.srt.entrypoints.openai.utils import ( to_openai_style_logprobs, ) from sglang.srt.function_call.function_call_parser import FunctionCallParser -from sglang.srt.jinja_template_utils import process_content_for_template_format from sglang.srt.managers.io_struct import GenerateReqInput from sglang.srt.managers.template_manager import TemplateManager from sglang.srt.managers.tokenizer_manager import TokenizerManager -from sglang.srt.reasoning_parser import ReasoningParser +from sglang.srt.parser.conversation import generate_chat_conv +from sglang.srt.parser.jinja_template_utils import process_content_for_template_format +from sglang.srt.parser.reasoning_parser import ReasoningParser from sglang.utils import convert_json_schema_to_str logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/entrypoints/openai/serving_completions.py b/python/sglang/srt/entrypoints/openai/serving_completions.py index 3b30f9070..82d1832c2 100644 --- a/python/sglang/srt/entrypoints/openai/serving_completions.py +++ b/python/sglang/srt/entrypoints/openai/serving_completions.py @@ -5,7 +5,6 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Union from fastapi import Request from fastapi.responses import ORJSONResponse, StreamingResponse -from sglang.srt.code_completion_parser import generate_completion_prompt_from_request from sglang.srt.entrypoints.openai.protocol import ( CompletionRequest, CompletionResponse, @@ -23,6 +22,9 @@ from sglang.srt.entrypoints.openai.utils import ( from sglang.srt.managers.io_struct import GenerateReqInput from sglang.srt.managers.template_manager import TemplateManager from sglang.srt.managers.tokenizer_manager import TokenizerManager +from sglang.srt.parser.code_completion_parser import ( + generate_completion_prompt_from_request, +) from sglang.utils import convert_json_schema_to_str logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/entrypoints/openai/serving_embedding.py b/python/sglang/srt/entrypoints/openai/serving_embedding.py index b9ac4559f..597623ae1 100644 --- a/python/sglang/srt/entrypoints/openai/serving_embedding.py +++ b/python/sglang/srt/entrypoints/openai/serving_embedding.py @@ -3,7 +3,6 @@ from typing import Any, Dict, List, Optional, Union from fastapi import Request from fastapi.responses import ORJSONResponse -from sglang.srt.conversation import generate_embedding_convs from sglang.srt.entrypoints.openai.protocol import ( EmbeddingObject, EmbeddingRequest, @@ -16,6 +15,7 @@ from sglang.srt.entrypoints.openai.serving_base import OpenAIServingBase from sglang.srt.managers.io_struct import EmbeddingReqInput from sglang.srt.managers.template_manager import TemplateManager from sglang.srt.managers.tokenizer_manager import TokenizerManager +from sglang.srt.parser.conversation import generate_embedding_convs class OpenAIServingEmbedding(OpenAIServingBase): diff --git a/python/sglang/srt/entrypoints/openai/serving_responses.py b/python/sglang/srt/entrypoints/openai/serving_responses.py index 4a28fc9d3..ef9b3d9ed 100644 --- a/python/sglang/srt/entrypoints/openai/serving_responses.py +++ b/python/sglang/srt/entrypoints/openai/serving_responses.py @@ -56,7 +56,7 @@ from sglang.srt.entrypoints.openai.tool_server import MCPToolServer, ToolServer from sglang.srt.managers.io_struct import GenerateReqInput from sglang.srt.managers.template_manager import TemplateManager from sglang.srt.managers.tokenizer_manager import TokenizerManager -from sglang.srt.reasoning_parser import ReasoningParser +from sglang.srt.parser.reasoning_parser import ReasoningParser from sglang.srt.utils import random_uuid logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/function_call/gpt_oss_detector.py b/python/sglang/srt/function_call/gpt_oss_detector.py index 46dac5d0e..7fe0a7dc8 100644 --- a/python/sglang/srt/function_call/gpt_oss_detector.py +++ b/python/sglang/srt/function_call/gpt_oss_detector.py @@ -10,7 +10,7 @@ from sglang.srt.function_call.core_types import ( ToolCallItem, _GetInfoFunc, ) -from sglang.srt.harmony_parser import HarmonyParser +from sglang.srt.parser.harmony_parser import HarmonyParser logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/model_parallel.py b/python/sglang/srt/layers/model_parallel.py similarity index 100% rename from python/sglang/srt/model_parallel.py rename to python/sglang/srt/layers/model_parallel.py diff --git a/python/sglang/srt/layers/moe/utils.py b/python/sglang/srt/layers/moe/utils.py index 2fbab220f..1be17ea68 100644 --- a/python/sglang/srt/layers/moe/utils.py +++ b/python/sglang/srt/layers/moe/utils.py @@ -162,7 +162,6 @@ def get_deepep_config() -> str: def is_tbo_enabled() -> bool: global IS_TBO_ENABLED if IS_TBO_ENABLED is None: - logger.warning("IS_TBO_ENABLED is not initialized, using False") IS_TBO_ENABLED = False return IS_TBO_ENABLED diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index af24f941c..50f49e229 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -141,7 +141,7 @@ from sglang.srt.mem_cache.lora_radix_cache import LoRARadixCache from sglang.srt.mem_cache.radix_cache import RadixCache from sglang.srt.mem_cache.swa_radix_cache import SWARadixCache from sglang.srt.model_executor.forward_batch_info import ForwardMode, PPProxyTensors -from sglang.srt.reasoning_parser import ReasoningParser +from sglang.srt.parser.reasoning_parser import ReasoningParser from sglang.srt.server_args import PortArgs, ServerArgs from sglang.srt.speculative.spec_info import SpeculativeAlgorithm from sglang.srt.torch_memory_saver_adapter import TorchMemorySaverAdapter diff --git a/python/sglang/srt/managers/template_manager.py b/python/sglang/srt/managers/template_manager.py index b4f8602c1..1d9bbea81 100644 --- a/python/sglang/srt/managers/template_manager.py +++ b/python/sglang/srt/managers/template_manager.py @@ -24,20 +24,20 @@ import os import re from typing import Optional -from sglang.srt.code_completion_parser import ( +from sglang.srt.parser.code_completion_parser import ( CompletionTemplate, FimPosition, completion_template_exists, register_completion_template, ) -from sglang.srt.conversation import ( +from sglang.srt.parser.conversation import ( Conversation, SeparatorStyle, chat_template_exists, get_conv_template_by_model_path, register_conv_template, ) -from sglang.srt.jinja_template_utils import detect_jinja_template_content_format +from sglang.srt.parser.jinja_template_utils import detect_jinja_template_content_format logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 64bb885a6..fa35fd14b 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -1655,7 +1655,7 @@ class ModelRunner: def apply_torch_tp(self): logger.info(f"Enabling torch tensor parallelism on {self.tp_size} devices.") - from sglang.srt.model_parallel import tensor_parallel + from sglang.srt.layers.model_parallel import tensor_parallel device_mesh = torch.distributed.init_device_mesh(self.device, (self.tp_size,)) tensor_parallel(self.model, device_mesh) diff --git a/python/sglang/srt/models/torch_native_llama.py b/python/sglang/srt/models/torch_native_llama.py index 630e5feb8..00499ce66 100644 --- a/python/sglang/srt/models/torch_native_llama.py +++ b/python/sglang/srt/models/torch_native_llama.py @@ -22,7 +22,7 @@ Reference: https://pytorch.org/docs/stable/distributed.tensor.parallel.html Here is a quick example to enable TP: ```python -from sglang.srt.model_parallel import tensor_parallel +from sglang.srt.layers.model_parallel import tensor_parallel device_mesh = torch.distributed.init_device_mesh("cuda", (tp_size,)) tensor_parallel(model, device_mesh) diff --git a/python/sglang/srt/code_completion_parser.py b/python/sglang/srt/parser/code_completion_parser.py similarity index 100% rename from python/sglang/srt/code_completion_parser.py rename to python/sglang/srt/parser/code_completion_parser.py diff --git a/python/sglang/srt/conversation.py b/python/sglang/srt/parser/conversation.py similarity index 100% rename from python/sglang/srt/conversation.py rename to python/sglang/srt/parser/conversation.py diff --git a/python/sglang/srt/harmony_parser.py b/python/sglang/srt/parser/harmony_parser.py similarity index 100% rename from python/sglang/srt/harmony_parser.py rename to python/sglang/srt/parser/harmony_parser.py diff --git a/python/sglang/srt/jinja_template_utils.py b/python/sglang/srt/parser/jinja_template_utils.py similarity index 100% rename from python/sglang/srt/jinja_template_utils.py rename to python/sglang/srt/parser/jinja_template_utils.py diff --git a/python/sglang/srt/reasoning_parser.py b/python/sglang/srt/parser/reasoning_parser.py similarity index 99% rename from python/sglang/srt/reasoning_parser.py rename to python/sglang/srt/parser/reasoning_parser.py index 149613bb7..f50368aed 100644 --- a/python/sglang/srt/reasoning_parser.py +++ b/python/sglang/srt/parser/reasoning_parser.py @@ -1,7 +1,7 @@ import re from typing import Dict, Optional, Tuple, Type -from sglang.srt.harmony_parser import HarmonyParser +from sglang.srt.parser.harmony_parser import HarmonyParser class StreamingParseResult: diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index eaf4a5869..c6255223d 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -26,7 +26,7 @@ from typing import List, Literal, Optional, Union from sglang.srt.function_call.function_call_parser import FunctionCallParser from sglang.srt.hf_transformers_utils import check_gguf_file, get_config from sglang.srt.lora.lora_registry import LoRARef -from sglang.srt.reasoning_parser import ReasoningParser +from sglang.srt.parser.reasoning_parser import ReasoningParser from sglang.srt.utils import ( LORA_TARGET_ALL_MODULES, SUPPORTED_LORA_TARGET_MODULES, diff --git a/test/lang/test_separate_reasoning_execution.py b/test/lang/test_separate_reasoning_execution.py index 5bed32340..481488f6a 100644 --- a/test/lang/test_separate_reasoning_execution.py +++ b/test/lang/test_separate_reasoning_execution.py @@ -64,7 +64,7 @@ class TestSeparateReasoningExecution(CustomTestCase): for ev in self.events: ev.set() - @patch("sglang.srt.reasoning_parser.ReasoningParser") + @patch("sglang.srt.parser.reasoning_parser.ReasoningParser") def test_execute_separate_reasoning(self, mock_parser_class): """Test that _execute_separate_reasoning correctly calls the ReasoningParser.""" # Setup mock parser @@ -136,7 +136,7 @@ class TestSeparateReasoningExecution(CustomTestCase): # Verify that the text was updated self.assertEqual(executor.text_, f"[NORMAL from deepseek-r1]: {var_value}") - @patch("sglang.srt.reasoning_parser.ReasoningParser") + @patch("sglang.srt.parser.reasoning_parser.ReasoningParser") def test_reasoning_parser_integration(self, mock_parser_class): """Test the integration between separate_reasoning and ReasoningParser.""" # Setup mock parsers for different model types @@ -167,7 +167,7 @@ class TestSeparateReasoningExecution(CustomTestCase): self.assertEqual(reasoning, f"[REASONING from qwen3]: {test_text}") self.assertEqual(normal_text, f"[NORMAL from qwen3]: {test_text}") - @patch("sglang.srt.reasoning_parser.ReasoningParser") + @patch("sglang.srt.parser.reasoning_parser.ReasoningParser") def test_reasoning_parser_invalid_model(self, mock_parser_class): """Test that ReasoningParser raises an error for invalid model types.""" diff --git a/test/srt/test_harmony_parser.py b/test/srt/test_harmony_parser.py index f1193081b..20cc02e5c 100644 --- a/test/srt/test_harmony_parser.py +++ b/test/srt/test_harmony_parser.py @@ -1,6 +1,6 @@ import unittest -from sglang.srt.harmony_parser import ( +from sglang.srt.parser.harmony_parser import ( CanonicalStrategy, Event, HarmonyParser, diff --git a/test/srt/test_jinja_template_utils.py b/test/srt/test_jinja_template_utils.py index a861ac824..46e634006 100644 --- a/test/srt/test_jinja_template_utils.py +++ b/test/srt/test_jinja_template_utils.py @@ -4,7 +4,7 @@ Unit tests for Jinja chat template utils. import unittest -from sglang.srt.jinja_template_utils import ( +from sglang.srt.parser.jinja_template_utils import ( detect_jinja_template_content_format, process_content_for_template_format, ) diff --git a/test/srt/test_reasoning_parser.py b/test/srt/test_reasoning_parser.py index dca314d35..7d3f2a139 100644 --- a/test/srt/test_reasoning_parser.py +++ b/test/srt/test_reasoning_parser.py @@ -1,6 +1,6 @@ import unittest -from sglang.srt.reasoning_parser import ( +from sglang.srt.parser.reasoning_parser import ( BaseReasoningFormatDetector, DeepSeekR1Detector, KimiDetector, diff --git a/test/srt/test_vlm_accuracy.py b/test/srt/test_vlm_accuracy.py index a1eb0fc40..ef9a2ad51 100644 --- a/test/srt/test_vlm_accuracy.py +++ b/test/srt/test_vlm_accuracy.py @@ -13,7 +13,6 @@ from PIL import Image from transformers import AutoModel, AutoProcessor, AutoTokenizer from sglang.srt.configs.model_config import ModelConfig -from sglang.srt.conversation import generate_chat_conv from sglang.srt.entrypoints.openai.protocol import ChatCompletionRequest from sglang.srt.managers.mm_utils import embed_mm_inputs, init_embedding_cache from sglang.srt.managers.schedule_batch import ( @@ -23,6 +22,7 @@ from sglang.srt.managers.schedule_batch import ( ) from sglang.srt.model_executor.model_runner import ModelRunner from sglang.srt.multimodal.processors.base_processor import BaseMultimodalProcessor +from sglang.srt.parser.conversation import generate_chat_conv from sglang.srt.server_args import ServerArgs diff --git a/test/srt/test_vlm_input_format.py b/test/srt/test_vlm_input_format.py index 4f9ad64c3..261700da5 100644 --- a/test/srt/test_vlm_input_format.py +++ b/test/srt/test_vlm_input_format.py @@ -14,8 +14,8 @@ from transformers import ( ) from sglang import Engine -from sglang.srt.conversation import generate_chat_conv from sglang.srt.entrypoints.openai.protocol import ChatCompletionRequest +from sglang.srt.parser.conversation import generate_chat_conv TEST_IMAGE_URL = "https://github.com/sgl-project/sglang/blob/main/test/lang/example_image.png?raw=true"