From 033b75f559011291c4148dfc63773b7c8852b9d2 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 10 Sep 2025 16:58:59 -0700 Subject: [PATCH] [Auto Sync] Update serving_base.py, serving_chat.py, servin... (20250910) (#10282) Co-authored-by: github-actions[bot] Co-authored-by: cctry --- python/sglang/srt/entrypoints/openai/serving_base.py | 8 ++++++-- python/sglang/srt/entrypoints/openai/serving_chat.py | 10 +++++++--- .../srt/entrypoints/openai/serving_completions.py | 10 +++++++--- .../sglang/srt/entrypoints/openai/serving_embedding.py | 10 +++++++--- .../sglang/srt/entrypoints/openai/serving_responses.py | 9 ++++++--- 5 files changed, 33 insertions(+), 14 deletions(-) diff --git a/python/sglang/srt/entrypoints/openai/serving_base.py b/python/sglang/srt/entrypoints/openai/serving_base.py index ad7c35f20..28b317e6d 100644 --- a/python/sglang/srt/entrypoints/openai/serving_base.py +++ b/python/sglang/srt/entrypoints/openai/serving_base.py @@ -1,15 +1,19 @@ +from __future__ import annotations + import json import logging import uuid from abc import ABC, abstractmethod -from typing import Any, Optional, Union +from typing import TYPE_CHECKING, Any, Optional, Union from fastapi import HTTPException, Request from fastapi.responses import ORJSONResponse, StreamingResponse from sglang.srt.entrypoints.openai.protocol import ErrorResponse, OpenAIServingRequest from sglang.srt.managers.io_struct import GenerateReqInput -from sglang.srt.managers.tokenizer_manager import TokenizerManager + +if TYPE_CHECKING: + from sglang.srt.managers.tokenizer_manager import TokenizerManager logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/entrypoints/openai/serving_chat.py b/python/sglang/srt/entrypoints/openai/serving_chat.py index 215c61c36..d67cbfde3 100644 --- a/python/sglang/srt/entrypoints/openai/serving_chat.py +++ b/python/sglang/srt/entrypoints/openai/serving_chat.py @@ -1,9 +1,11 @@ +from __future__ import annotations + import copy import json import logging import time import uuid -from typing import Any, AsyncGenerator, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, List, Optional, Union from fastapi import Request from fastapi.responses import ORJSONResponse, StreamingResponse @@ -33,13 +35,15 @@ from sglang.srt.entrypoints.openai.utils import ( ) from sglang.srt.function_call.function_call_parser import FunctionCallParser from sglang.srt.managers.io_struct import GenerateReqInput -from sglang.srt.managers.template_manager import TemplateManager -from sglang.srt.managers.tokenizer_manager import TokenizerManager from sglang.srt.parser.conversation import generate_chat_conv from sglang.srt.parser.jinja_template_utils import process_content_for_template_format from sglang.srt.parser.reasoning_parser import ReasoningParser from sglang.utils import convert_json_schema_to_str +if TYPE_CHECKING: + from sglang.srt.managers.template_manager import TemplateManager + from sglang.srt.managers.tokenizer_manager import TokenizerManager + logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/entrypoints/openai/serving_completions.py b/python/sglang/srt/entrypoints/openai/serving_completions.py index 82d1832c2..6fe02d325 100644 --- a/python/sglang/srt/entrypoints/openai/serving_completions.py +++ b/python/sglang/srt/entrypoints/openai/serving_completions.py @@ -1,6 +1,8 @@ +from __future__ import annotations + import logging import time -from typing import Any, AsyncGenerator, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, List, Optional, Union from fastapi import Request from fastapi.responses import ORJSONResponse, StreamingResponse @@ -20,13 +22,15 @@ from sglang.srt.entrypoints.openai.utils import ( to_openai_style_logprobs, ) from sglang.srt.managers.io_struct import GenerateReqInput -from sglang.srt.managers.template_manager import TemplateManager -from sglang.srt.managers.tokenizer_manager import TokenizerManager from sglang.srt.parser.code_completion_parser import ( generate_completion_prompt_from_request, ) from sglang.utils import convert_json_schema_to_str +if TYPE_CHECKING: + from sglang.srt.managers.template_manager import TemplateManager + from sglang.srt.managers.tokenizer_manager import TokenizerManager + logger = logging.getLogger(__name__) diff --git a/python/sglang/srt/entrypoints/openai/serving_embedding.py b/python/sglang/srt/entrypoints/openai/serving_embedding.py index 597623ae1..63c4fc34a 100644 --- a/python/sglang/srt/entrypoints/openai/serving_embedding.py +++ b/python/sglang/srt/entrypoints/openai/serving_embedding.py @@ -1,4 +1,6 @@ -from typing import Any, Dict, List, Optional, Union +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union from fastapi import Request from fastapi.responses import ORJSONResponse @@ -13,10 +15,12 @@ from sglang.srt.entrypoints.openai.protocol import ( ) from sglang.srt.entrypoints.openai.serving_base import OpenAIServingBase from sglang.srt.managers.io_struct import EmbeddingReqInput -from sglang.srt.managers.template_manager import TemplateManager -from sglang.srt.managers.tokenizer_manager import TokenizerManager from sglang.srt.parser.conversation import generate_embedding_convs +if TYPE_CHECKING: + from sglang.srt.managers.template_manager import TemplateManager + from sglang.srt.managers.tokenizer_manager import TokenizerManager + class OpenAIServingEmbedding(OpenAIServingBase): """Handler for v1/embeddings requests""" diff --git a/python/sglang/srt/entrypoints/openai/serving_responses.py b/python/sglang/srt/entrypoints/openai/serving_responses.py index ef9b3d9ed..3f7619678 100644 --- a/python/sglang/srt/entrypoints/openai/serving_responses.py +++ b/python/sglang/srt/entrypoints/openai/serving_responses.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # Adapted from vLLM's OpenAIServingResponses """Handler for /v1/responses requests""" +from __future__ import annotations import asyncio import copy @@ -9,7 +10,7 @@ import logging import time from contextlib import AsyncExitStack from http import HTTPStatus -from typing import Any, AsyncGenerator, AsyncIterator, Optional, Union +from typing import TYPE_CHECKING, Any, AsyncGenerator, AsyncIterator, Optional, Union import jinja2 import openai.types.responses as openai_responses_types @@ -54,11 +55,13 @@ from sglang.srt.entrypoints.openai.protocol import ( from sglang.srt.entrypoints.openai.serving_chat import OpenAIServingChat from sglang.srt.entrypoints.openai.tool_server import MCPToolServer, ToolServer from sglang.srt.managers.io_struct import GenerateReqInput -from sglang.srt.managers.template_manager import TemplateManager -from sglang.srt.managers.tokenizer_manager import TokenizerManager from sglang.srt.parser.reasoning_parser import ReasoningParser from sglang.srt.utils import random_uuid +if TYPE_CHECKING: + from sglang.srt.managers.template_manager import TemplateManager + from sglang.srt.managers.tokenizer_manager import TokenizerManager + logger = logging.getLogger(__name__)