[Auto Sync] Update serving_base.py, serving_chat.py, servin... (20250910) (#10282)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: cctry <shiyang@x.ai>
This commit is contained in:
@@ -1,14 +1,18 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import uuid
|
import uuid
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Any, Optional, Union
|
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||||
|
|
||||||
from fastapi import HTTPException, Request
|
from fastapi import HTTPException, Request
|
||||||
from fastapi.responses import ORJSONResponse, StreamingResponse
|
from fastapi.responses import ORJSONResponse, StreamingResponse
|
||||||
|
|
||||||
from sglang.srt.entrypoints.openai.protocol import ErrorResponse, OpenAIServingRequest
|
from sglang.srt.entrypoints.openai.protocol import ErrorResponse, OpenAIServingRequest
|
||||||
from sglang.srt.managers.io_struct import GenerateReqInput
|
from sglang.srt.managers.io_struct import GenerateReqInput
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|||||||
@@ -1,9 +1,11 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import copy
|
import copy
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Any, AsyncGenerator, Dict, List, Optional, Union
|
from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, List, Optional, Union
|
||||||
|
|
||||||
from fastapi import Request
|
from fastapi import Request
|
||||||
from fastapi.responses import ORJSONResponse, StreamingResponse
|
from fastapi.responses import ORJSONResponse, StreamingResponse
|
||||||
@@ -33,13 +35,15 @@ from sglang.srt.entrypoints.openai.utils import (
|
|||||||
)
|
)
|
||||||
from sglang.srt.function_call.function_call_parser import FunctionCallParser
|
from sglang.srt.function_call.function_call_parser import FunctionCallParser
|
||||||
from sglang.srt.managers.io_struct import GenerateReqInput
|
from sglang.srt.managers.io_struct import GenerateReqInput
|
||||||
from sglang.srt.managers.template_manager import TemplateManager
|
|
||||||
from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
|
||||||
from sglang.srt.parser.conversation import generate_chat_conv
|
from sglang.srt.parser.conversation import generate_chat_conv
|
||||||
from sglang.srt.parser.jinja_template_utils import process_content_for_template_format
|
from sglang.srt.parser.jinja_template_utils import process_content_for_template_format
|
||||||
from sglang.srt.parser.reasoning_parser import ReasoningParser
|
from sglang.srt.parser.reasoning_parser import ReasoningParser
|
||||||
from sglang.utils import convert_json_schema_to_str
|
from sglang.utils import convert_json_schema_to_str
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from sglang.srt.managers.template_manager import TemplateManager
|
||||||
|
from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
from typing import Any, AsyncGenerator, Dict, List, Optional, Union
|
from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, List, Optional, Union
|
||||||
|
|
||||||
from fastapi import Request
|
from fastapi import Request
|
||||||
from fastapi.responses import ORJSONResponse, StreamingResponse
|
from fastapi.responses import ORJSONResponse, StreamingResponse
|
||||||
@@ -20,13 +22,15 @@ from sglang.srt.entrypoints.openai.utils import (
|
|||||||
to_openai_style_logprobs,
|
to_openai_style_logprobs,
|
||||||
)
|
)
|
||||||
from sglang.srt.managers.io_struct import GenerateReqInput
|
from sglang.srt.managers.io_struct import GenerateReqInput
|
||||||
from sglang.srt.managers.template_manager import TemplateManager
|
|
||||||
from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
|
||||||
from sglang.srt.parser.code_completion_parser import (
|
from sglang.srt.parser.code_completion_parser import (
|
||||||
generate_completion_prompt_from_request,
|
generate_completion_prompt_from_request,
|
||||||
)
|
)
|
||||||
from sglang.utils import convert_json_schema_to_str
|
from sglang.utils import convert_json_schema_to_str
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from sglang.srt.managers.template_manager import TemplateManager
|
||||||
|
from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,6 @@
|
|||||||
from typing import Any, Dict, List, Optional, Union
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
from fastapi import Request
|
from fastapi import Request
|
||||||
from fastapi.responses import ORJSONResponse
|
from fastapi.responses import ORJSONResponse
|
||||||
@@ -13,9 +15,11 @@ from sglang.srt.entrypoints.openai.protocol import (
|
|||||||
)
|
)
|
||||||
from sglang.srt.entrypoints.openai.serving_base import OpenAIServingBase
|
from sglang.srt.entrypoints.openai.serving_base import OpenAIServingBase
|
||||||
from sglang.srt.managers.io_struct import EmbeddingReqInput
|
from sglang.srt.managers.io_struct import EmbeddingReqInput
|
||||||
|
from sglang.srt.parser.conversation import generate_embedding_convs
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
from sglang.srt.managers.template_manager import TemplateManager
|
from sglang.srt.managers.template_manager import TemplateManager
|
||||||
from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
||||||
from sglang.srt.parser.conversation import generate_embedding_convs
|
|
||||||
|
|
||||||
|
|
||||||
class OpenAIServingEmbedding(OpenAIServingBase):
|
class OpenAIServingEmbedding(OpenAIServingBase):
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# Adapted from vLLM's OpenAIServingResponses
|
# Adapted from vLLM's OpenAIServingResponses
|
||||||
"""Handler for /v1/responses requests"""
|
"""Handler for /v1/responses requests"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import copy
|
import copy
|
||||||
@@ -9,7 +10,7 @@ import logging
|
|||||||
import time
|
import time
|
||||||
from contextlib import AsyncExitStack
|
from contextlib import AsyncExitStack
|
||||||
from http import HTTPStatus
|
from http import HTTPStatus
|
||||||
from typing import Any, AsyncGenerator, AsyncIterator, Optional, Union
|
from typing import TYPE_CHECKING, Any, AsyncGenerator, AsyncIterator, Optional, Union
|
||||||
|
|
||||||
import jinja2
|
import jinja2
|
||||||
import openai.types.responses as openai_responses_types
|
import openai.types.responses as openai_responses_types
|
||||||
@@ -54,11 +55,13 @@ from sglang.srt.entrypoints.openai.protocol import (
|
|||||||
from sglang.srt.entrypoints.openai.serving_chat import OpenAIServingChat
|
from sglang.srt.entrypoints.openai.serving_chat import OpenAIServingChat
|
||||||
from sglang.srt.entrypoints.openai.tool_server import MCPToolServer, ToolServer
|
from sglang.srt.entrypoints.openai.tool_server import MCPToolServer, ToolServer
|
||||||
from sglang.srt.managers.io_struct import GenerateReqInput
|
from sglang.srt.managers.io_struct import GenerateReqInput
|
||||||
from sglang.srt.managers.template_manager import TemplateManager
|
|
||||||
from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
|
||||||
from sglang.srt.parser.reasoning_parser import ReasoningParser
|
from sglang.srt.parser.reasoning_parser import ReasoningParser
|
||||||
from sglang.srt.utils import random_uuid
|
from sglang.srt.utils import random_uuid
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from sglang.srt.managers.template_manager import TemplateManager
|
||||||
|
from sglang.srt.managers.tokenizer_manager import TokenizerManager
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user