Move parsers under a single folder (#9912)
This commit is contained in:
@@ -313,7 +313,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import sglang as sgl\n",
|
"import sglang as sgl\n",
|
||||||
"from sglang.srt.reasoning_parser import ReasoningParser\n",
|
"from sglang.srt.parser.reasoning_parser import ReasoningParser\n",
|
||||||
"from sglang.utils import print_highlight\n",
|
"from sglang.utils import print_highlight\n",
|
||||||
"\n",
|
"\n",
|
||||||
"llm = sgl.Engine(model_path=\"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B\")\n",
|
"llm = sgl.Engine(model_path=\"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B\")\n",
|
||||||
|
|||||||
@@ -44,7 +44,7 @@
|
|||||||
"import requests\n",
|
"import requests\n",
|
||||||
"from PIL import Image\n",
|
"from PIL import Image\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from sglang.srt.conversation import chat_templates\n",
|
"from sglang.srt.parser.conversation import chat_templates\n",
|
||||||
"\n",
|
"\n",
|
||||||
"image = Image.open(\n",
|
"image = Image.open(\n",
|
||||||
" BytesIO(\n",
|
" BytesIO(\n",
|
||||||
@@ -182,7 +182,7 @@
|
|||||||
"import requests\n",
|
"import requests\n",
|
||||||
"from PIL import Image\n",
|
"from PIL import Image\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from sglang.srt.conversation import chat_templates\n",
|
"from sglang.srt.parser.conversation import chat_templates\n",
|
||||||
"\n",
|
"\n",
|
||||||
"image = Image.open(\n",
|
"image = Image.open(\n",
|
||||||
" BytesIO(\n",
|
" BytesIO(\n",
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import argparse
|
|||||||
import dataclasses
|
import dataclasses
|
||||||
|
|
||||||
import sglang as sgl
|
import sglang as sgl
|
||||||
from sglang.srt.conversation import chat_templates
|
from sglang.srt.parser.conversation import chat_templates
|
||||||
from sglang.srt.server_args import ServerArgs
|
from sglang.srt.server_args import ServerArgs
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -740,7 +740,7 @@ class StreamExecutor:
|
|||||||
# Execute the stored lazy generation calls
|
# Execute the stored lazy generation calls
|
||||||
self.backend.role_end_generate(self)
|
self.backend.role_end_generate(self)
|
||||||
|
|
||||||
from sglang.srt.reasoning_parser import ReasoningParser
|
from sglang.srt.parser.reasoning_parser import ReasoningParser
|
||||||
|
|
||||||
reasoning_parser = ReasoningParser(expr.model_type)
|
reasoning_parser = ReasoningParser(expr.model_type)
|
||||||
other = expr.expr
|
other = expr.expr
|
||||||
|
|||||||
@@ -102,7 +102,7 @@ from sglang.srt.managers.multi_tokenizer_mixin import (
|
|||||||
from sglang.srt.managers.template_manager import TemplateManager
|
from sglang.srt.managers.template_manager import TemplateManager
|
||||||
from sglang.srt.managers.tokenizer_manager import ServerStatus, TokenizerManager
|
from sglang.srt.managers.tokenizer_manager import ServerStatus, TokenizerManager
|
||||||
from sglang.srt.metrics.func_timer import enable_func_timer
|
from sglang.srt.metrics.func_timer import enable_func_timer
|
||||||
from sglang.srt.reasoning_parser import ReasoningParser
|
from sglang.srt.parser.reasoning_parser import ReasoningParser
|
||||||
from sglang.srt.server_args import PortArgs, ServerArgs
|
from sglang.srt.server_args import PortArgs, ServerArgs
|
||||||
from sglang.srt.utils import (
|
from sglang.srt.utils import (
|
||||||
add_api_key_middleware,
|
add_api_key_middleware,
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Union
|
|||||||
from fastapi import Request
|
from fastapi import Request
|
||||||
from fastapi.responses import ORJSONResponse, StreamingResponse
|
from fastapi.responses import ORJSONResponse, StreamingResponse
|
||||||
|
|
||||||
from sglang.srt.conversation import generate_chat_conv
|
|
||||||
from sglang.srt.entrypoints.openai.protocol import (
|
from sglang.srt.entrypoints.openai.protocol import (
|
||||||
ChatCompletionRequest,
|
ChatCompletionRequest,
|
||||||
ChatCompletionResponse,
|
ChatCompletionResponse,
|
||||||
@@ -33,11 +32,12 @@ from sglang.srt.entrypoints.openai.utils import (
|
|||||||
to_openai_style_logprobs,
|
to_openai_style_logprobs,
|
||||||
)
|
)
|
||||||
from sglang.srt.function_call.function_call_parser import FunctionCallParser
|
from sglang.srt.function_call.function_call_parser import FunctionCallParser
|
||||||
from sglang.srt.jinja_template_utils import process_content_for_template_format
|
|
||||||
from sglang.srt.managers.io_struct import GenerateReqInput
|
from sglang.srt.managers.io_struct import GenerateReqInput
|
||||||
from sglang.srt.managers.template_manager import TemplateManager
|
from sglang.srt.managers.template_manager import TemplateManager
|
||||||
from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
||||||
from sglang.srt.reasoning_parser import ReasoningParser
|
from sglang.srt.parser.conversation import generate_chat_conv
|
||||||
|
from sglang.srt.parser.jinja_template_utils import process_content_for_template_format
|
||||||
|
from sglang.srt.parser.reasoning_parser import ReasoningParser
|
||||||
from sglang.utils import convert_json_schema_to_str
|
from sglang.utils import convert_json_schema_to_str
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Union
|
|||||||
from fastapi import Request
|
from fastapi import Request
|
||||||
from fastapi.responses import ORJSONResponse, StreamingResponse
|
from fastapi.responses import ORJSONResponse, StreamingResponse
|
||||||
|
|
||||||
from sglang.srt.code_completion_parser import generate_completion_prompt_from_request
|
|
||||||
from sglang.srt.entrypoints.openai.protocol import (
|
from sglang.srt.entrypoints.openai.protocol import (
|
||||||
CompletionRequest,
|
CompletionRequest,
|
||||||
CompletionResponse,
|
CompletionResponse,
|
||||||
@@ -23,6 +22,9 @@ from sglang.srt.entrypoints.openai.utils import (
|
|||||||
from sglang.srt.managers.io_struct import GenerateReqInput
|
from sglang.srt.managers.io_struct import GenerateReqInput
|
||||||
from sglang.srt.managers.template_manager import TemplateManager
|
from sglang.srt.managers.template_manager import TemplateManager
|
||||||
from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
||||||
|
from sglang.srt.parser.code_completion_parser import (
|
||||||
|
generate_completion_prompt_from_request,
|
||||||
|
)
|
||||||
from sglang.utils import convert_json_schema_to_str
|
from sglang.utils import convert_json_schema_to_str
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ from typing import Any, Dict, List, Optional, Union
|
|||||||
from fastapi import Request
|
from fastapi import Request
|
||||||
from fastapi.responses import ORJSONResponse
|
from fastapi.responses import ORJSONResponse
|
||||||
|
|
||||||
from sglang.srt.conversation import generate_embedding_convs
|
|
||||||
from sglang.srt.entrypoints.openai.protocol import (
|
from sglang.srt.entrypoints.openai.protocol import (
|
||||||
EmbeddingObject,
|
EmbeddingObject,
|
||||||
EmbeddingRequest,
|
EmbeddingRequest,
|
||||||
@@ -16,6 +15,7 @@ from sglang.srt.entrypoints.openai.serving_base import OpenAIServingBase
|
|||||||
from sglang.srt.managers.io_struct import EmbeddingReqInput
|
from sglang.srt.managers.io_struct import EmbeddingReqInput
|
||||||
from sglang.srt.managers.template_manager import TemplateManager
|
from sglang.srt.managers.template_manager import TemplateManager
|
||||||
from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
||||||
|
from sglang.srt.parser.conversation import generate_embedding_convs
|
||||||
|
|
||||||
|
|
||||||
class OpenAIServingEmbedding(OpenAIServingBase):
|
class OpenAIServingEmbedding(OpenAIServingBase):
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ from sglang.srt.entrypoints.openai.tool_server import MCPToolServer, ToolServer
|
|||||||
from sglang.srt.managers.io_struct import GenerateReqInput
|
from sglang.srt.managers.io_struct import GenerateReqInput
|
||||||
from sglang.srt.managers.template_manager import TemplateManager
|
from sglang.srt.managers.template_manager import TemplateManager
|
||||||
from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
||||||
from sglang.srt.reasoning_parser import ReasoningParser
|
from sglang.srt.parser.reasoning_parser import ReasoningParser
|
||||||
from sglang.srt.utils import random_uuid
|
from sglang.srt.utils import random_uuid
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from sglang.srt.function_call.core_types import (
|
|||||||
ToolCallItem,
|
ToolCallItem,
|
||||||
_GetInfoFunc,
|
_GetInfoFunc,
|
||||||
)
|
)
|
||||||
from sglang.srt.harmony_parser import HarmonyParser
|
from sglang.srt.parser.harmony_parser import HarmonyParser
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@@ -162,7 +162,6 @@ def get_deepep_config() -> str:
|
|||||||
def is_tbo_enabled() -> bool:
|
def is_tbo_enabled() -> bool:
|
||||||
global IS_TBO_ENABLED
|
global IS_TBO_ENABLED
|
||||||
if IS_TBO_ENABLED is None:
|
if IS_TBO_ENABLED is None:
|
||||||
logger.warning("IS_TBO_ENABLED is not initialized, using False")
|
|
||||||
IS_TBO_ENABLED = False
|
IS_TBO_ENABLED = False
|
||||||
return IS_TBO_ENABLED
|
return IS_TBO_ENABLED
|
||||||
|
|
||||||
|
|||||||
@@ -141,7 +141,7 @@ from sglang.srt.mem_cache.lora_radix_cache import LoRARadixCache
|
|||||||
from sglang.srt.mem_cache.radix_cache import RadixCache
|
from sglang.srt.mem_cache.radix_cache import RadixCache
|
||||||
from sglang.srt.mem_cache.swa_radix_cache import SWARadixCache
|
from sglang.srt.mem_cache.swa_radix_cache import SWARadixCache
|
||||||
from sglang.srt.model_executor.forward_batch_info import ForwardMode, PPProxyTensors
|
from sglang.srt.model_executor.forward_batch_info import ForwardMode, PPProxyTensors
|
||||||
from sglang.srt.reasoning_parser import ReasoningParser
|
from sglang.srt.parser.reasoning_parser import ReasoningParser
|
||||||
from sglang.srt.server_args import PortArgs, ServerArgs
|
from sglang.srt.server_args import PortArgs, ServerArgs
|
||||||
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
|
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
|
||||||
from sglang.srt.torch_memory_saver_adapter import TorchMemorySaverAdapter
|
from sglang.srt.torch_memory_saver_adapter import TorchMemorySaverAdapter
|
||||||
|
|||||||
@@ -24,20 +24,20 @@ import os
|
|||||||
import re
|
import re
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from sglang.srt.code_completion_parser import (
|
from sglang.srt.parser.code_completion_parser import (
|
||||||
CompletionTemplate,
|
CompletionTemplate,
|
||||||
FimPosition,
|
FimPosition,
|
||||||
completion_template_exists,
|
completion_template_exists,
|
||||||
register_completion_template,
|
register_completion_template,
|
||||||
)
|
)
|
||||||
from sglang.srt.conversation import (
|
from sglang.srt.parser.conversation import (
|
||||||
Conversation,
|
Conversation,
|
||||||
SeparatorStyle,
|
SeparatorStyle,
|
||||||
chat_template_exists,
|
chat_template_exists,
|
||||||
get_conv_template_by_model_path,
|
get_conv_template_by_model_path,
|
||||||
register_conv_template,
|
register_conv_template,
|
||||||
)
|
)
|
||||||
from sglang.srt.jinja_template_utils import detect_jinja_template_content_format
|
from sglang.srt.parser.jinja_template_utils import detect_jinja_template_content_format
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@@ -1655,7 +1655,7 @@ class ModelRunner:
|
|||||||
|
|
||||||
def apply_torch_tp(self):
|
def apply_torch_tp(self):
|
||||||
logger.info(f"Enabling torch tensor parallelism on {self.tp_size} devices.")
|
logger.info(f"Enabling torch tensor parallelism on {self.tp_size} devices.")
|
||||||
from sglang.srt.model_parallel import tensor_parallel
|
from sglang.srt.layers.model_parallel import tensor_parallel
|
||||||
|
|
||||||
device_mesh = torch.distributed.init_device_mesh(self.device, (self.tp_size,))
|
device_mesh = torch.distributed.init_device_mesh(self.device, (self.tp_size,))
|
||||||
tensor_parallel(self.model, device_mesh)
|
tensor_parallel(self.model, device_mesh)
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ Reference: https://pytorch.org/docs/stable/distributed.tensor.parallel.html
|
|||||||
|
|
||||||
Here is a quick example to enable TP:
|
Here is a quick example to enable TP:
|
||||||
```python
|
```python
|
||||||
from sglang.srt.model_parallel import tensor_parallel
|
from sglang.srt.layers.model_parallel import tensor_parallel
|
||||||
|
|
||||||
device_mesh = torch.distributed.init_device_mesh("cuda", (tp_size,))
|
device_mesh = torch.distributed.init_device_mesh("cuda", (tp_size,))
|
||||||
tensor_parallel(model, device_mesh)
|
tensor_parallel(model, device_mesh)
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import re
|
import re
|
||||||
from typing import Dict, Optional, Tuple, Type
|
from typing import Dict, Optional, Tuple, Type
|
||||||
|
|
||||||
from sglang.srt.harmony_parser import HarmonyParser
|
from sglang.srt.parser.harmony_parser import HarmonyParser
|
||||||
|
|
||||||
|
|
||||||
class StreamingParseResult:
|
class StreamingParseResult:
|
||||||
@@ -26,7 +26,7 @@ from typing import List, Literal, Optional, Union
|
|||||||
from sglang.srt.function_call.function_call_parser import FunctionCallParser
|
from sglang.srt.function_call.function_call_parser import FunctionCallParser
|
||||||
from sglang.srt.hf_transformers_utils import check_gguf_file, get_config
|
from sglang.srt.hf_transformers_utils import check_gguf_file, get_config
|
||||||
from sglang.srt.lora.lora_registry import LoRARef
|
from sglang.srt.lora.lora_registry import LoRARef
|
||||||
from sglang.srt.reasoning_parser import ReasoningParser
|
from sglang.srt.parser.reasoning_parser import ReasoningParser
|
||||||
from sglang.srt.utils import (
|
from sglang.srt.utils import (
|
||||||
LORA_TARGET_ALL_MODULES,
|
LORA_TARGET_ALL_MODULES,
|
||||||
SUPPORTED_LORA_TARGET_MODULES,
|
SUPPORTED_LORA_TARGET_MODULES,
|
||||||
|
|||||||
@@ -64,7 +64,7 @@ class TestSeparateReasoningExecution(CustomTestCase):
|
|||||||
for ev in self.events:
|
for ev in self.events:
|
||||||
ev.set()
|
ev.set()
|
||||||
|
|
||||||
@patch("sglang.srt.reasoning_parser.ReasoningParser")
|
@patch("sglang.srt.parser.reasoning_parser.ReasoningParser")
|
||||||
def test_execute_separate_reasoning(self, mock_parser_class):
|
def test_execute_separate_reasoning(self, mock_parser_class):
|
||||||
"""Test that _execute_separate_reasoning correctly calls the ReasoningParser."""
|
"""Test that _execute_separate_reasoning correctly calls the ReasoningParser."""
|
||||||
# Setup mock parser
|
# Setup mock parser
|
||||||
@@ -136,7 +136,7 @@ class TestSeparateReasoningExecution(CustomTestCase):
|
|||||||
# Verify that the text was updated
|
# Verify that the text was updated
|
||||||
self.assertEqual(executor.text_, f"[NORMAL from deepseek-r1]: {var_value}")
|
self.assertEqual(executor.text_, f"[NORMAL from deepseek-r1]: {var_value}")
|
||||||
|
|
||||||
@patch("sglang.srt.reasoning_parser.ReasoningParser")
|
@patch("sglang.srt.parser.reasoning_parser.ReasoningParser")
|
||||||
def test_reasoning_parser_integration(self, mock_parser_class):
|
def test_reasoning_parser_integration(self, mock_parser_class):
|
||||||
"""Test the integration between separate_reasoning and ReasoningParser."""
|
"""Test the integration between separate_reasoning and ReasoningParser."""
|
||||||
# Setup mock parsers for different model types
|
# Setup mock parsers for different model types
|
||||||
@@ -167,7 +167,7 @@ class TestSeparateReasoningExecution(CustomTestCase):
|
|||||||
self.assertEqual(reasoning, f"[REASONING from qwen3]: {test_text}")
|
self.assertEqual(reasoning, f"[REASONING from qwen3]: {test_text}")
|
||||||
self.assertEqual(normal_text, f"[NORMAL from qwen3]: {test_text}")
|
self.assertEqual(normal_text, f"[NORMAL from qwen3]: {test_text}")
|
||||||
|
|
||||||
@patch("sglang.srt.reasoning_parser.ReasoningParser")
|
@patch("sglang.srt.parser.reasoning_parser.ReasoningParser")
|
||||||
def test_reasoning_parser_invalid_model(self, mock_parser_class):
|
def test_reasoning_parser_invalid_model(self, mock_parser_class):
|
||||||
"""Test that ReasoningParser raises an error for invalid model types."""
|
"""Test that ReasoningParser raises an error for invalid model types."""
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from sglang.srt.harmony_parser import (
|
from sglang.srt.parser.harmony_parser import (
|
||||||
CanonicalStrategy,
|
CanonicalStrategy,
|
||||||
Event,
|
Event,
|
||||||
HarmonyParser,
|
HarmonyParser,
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ Unit tests for Jinja chat template utils.
|
|||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from sglang.srt.jinja_template_utils import (
|
from sglang.srt.parser.jinja_template_utils import (
|
||||||
detect_jinja_template_content_format,
|
detect_jinja_template_content_format,
|
||||||
process_content_for_template_format,
|
process_content_for_template_format,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from sglang.srt.reasoning_parser import (
|
from sglang.srt.parser.reasoning_parser import (
|
||||||
BaseReasoningFormatDetector,
|
BaseReasoningFormatDetector,
|
||||||
DeepSeekR1Detector,
|
DeepSeekR1Detector,
|
||||||
KimiDetector,
|
KimiDetector,
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ from PIL import Image
|
|||||||
from transformers import AutoModel, AutoProcessor, AutoTokenizer
|
from transformers import AutoModel, AutoProcessor, AutoTokenizer
|
||||||
|
|
||||||
from sglang.srt.configs.model_config import ModelConfig
|
from sglang.srt.configs.model_config import ModelConfig
|
||||||
from sglang.srt.conversation import generate_chat_conv
|
|
||||||
from sglang.srt.entrypoints.openai.protocol import ChatCompletionRequest
|
from sglang.srt.entrypoints.openai.protocol import ChatCompletionRequest
|
||||||
from sglang.srt.managers.mm_utils import embed_mm_inputs, init_embedding_cache
|
from sglang.srt.managers.mm_utils import embed_mm_inputs, init_embedding_cache
|
||||||
from sglang.srt.managers.schedule_batch import (
|
from sglang.srt.managers.schedule_batch import (
|
||||||
@@ -23,6 +22,7 @@ from sglang.srt.managers.schedule_batch import (
|
|||||||
)
|
)
|
||||||
from sglang.srt.model_executor.model_runner import ModelRunner
|
from sglang.srt.model_executor.model_runner import ModelRunner
|
||||||
from sglang.srt.multimodal.processors.base_processor import BaseMultimodalProcessor
|
from sglang.srt.multimodal.processors.base_processor import BaseMultimodalProcessor
|
||||||
|
from sglang.srt.parser.conversation import generate_chat_conv
|
||||||
from sglang.srt.server_args import ServerArgs
|
from sglang.srt.server_args import ServerArgs
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -14,8 +14,8 @@ from transformers import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
from sglang import Engine
|
from sglang import Engine
|
||||||
from sglang.srt.conversation import generate_chat_conv
|
|
||||||
from sglang.srt.entrypoints.openai.protocol import ChatCompletionRequest
|
from sglang.srt.entrypoints.openai.protocol import ChatCompletionRequest
|
||||||
|
from sglang.srt.parser.conversation import generate_chat_conv
|
||||||
|
|
||||||
TEST_IMAGE_URL = "https://github.com/sgl-project/sglang/blob/main/test/lang/example_image.png?raw=true"
|
TEST_IMAGE_URL = "https://github.com/sgl-project/sglang/blob/main/test/lang/example_image.png?raw=true"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user