diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py index 073fc514b..b49601933 100644 --- a/python/sglang/srt/openai_api/adapter.py +++ b/python/sglang/srt/openai_api/adapter.py @@ -516,8 +516,9 @@ def v1_generate_request( "regex": request.regex, "json_schema": request.json_schema, "n": request.n, - "ignore_eos": request.ignore_eos, "no_stop_trim": request.no_stop_trim, + "ignore_eos": request.ignore_eos, + "skip_special_tokens": request.skip_special_tokens, } ) return_logprobs.append(request.logprobs is not None and request.logprobs > 0) @@ -928,7 +929,9 @@ def v1_chat_generate_request( "repetition_penalty": request.repetition_penalty, "regex": request.regex, "n": request.n, + "no_stop_trim": request.no_stop_trim, "ignore_eos": request.ignore_eos, + "skip_special_tokens": request.skip_special_tokens, } if request.response_format and request.response_format.type == "json_schema": sampling_params["json_schema"] = convert_json_schema_to_str( diff --git a/python/sglang/srt/openai_api/protocol.py b/python/sglang/srt/openai_api/protocol.py index 583db66c4..3f15e229c 100644 --- a/python/sglang/srt/openai_api/protocol.py +++ b/python/sglang/srt/openai_api/protocol.py @@ -36,7 +36,7 @@ class ModelList(BaseModel): """Model list consists of model cards.""" object: str = "list" - data: List[ModelCard] = [] + data: List[ModelCard] = Field(default_factory=list) class ErrorResponse(BaseModel): @@ -143,7 +143,7 @@ class BatchResponse(BaseModel): expired_at: Optional[int] = None cancelling_at: Optional[int] = None cancelled_at: Optional[int] = None - request_counts: dict = {"total": 0, "completed": 0, "failed": 0} + request_counts: Optional[dict] = None metadata: Optional[dict] = None @@ -153,30 +153,31 @@ class CompletionRequest(BaseModel): model: str prompt: Union[List[int], List[List[int]], str, List[str]] best_of: Optional[int] = None - echo: Optional[bool] = False - frequency_penalty: Optional[float] = 0.0 + echo: bool = False + frequency_penalty: float = 0.0 logit_bias: Optional[Dict[str, float]] = None logprobs: Optional[int] = None - max_tokens: Optional[int] = 16 + max_tokens: int = 16 n: int = 1 - presence_penalty: Optional[float] = 0.0 + presence_penalty: float = 0.0 seed: Optional[int] = None - stop: Optional[Union[str, List[str]]] = Field(default_factory=list) - stream: Optional[bool] = False + stop: Optional[Union[str, List[str]]] = None + stream: bool = False stream_options: Optional[StreamOptions] = None suffix: Optional[str] = None - temperature: Optional[float] = 1.0 - top_p: Optional[float] = 1.0 + temperature: float = 1.0 + top_p: float = 1.0 user: Optional[str] = None # Extra parameters for SRT backend only and will be ignored by OpenAI models. - regex: Optional[str] = None json_schema: Optional[str] = None - ignore_eos: bool = False + regex: Optional[str] = None min_tokens: int = 0 - repetition_penalty: Optional[float] = 1.0 - stop_token_ids: Optional[List[int]] = Field(default_factory=list) - no_stop_trim: Union[bool, List[bool]] = False + repetition_penalty: float = 1.0 + stop_token_ids: Optional[List[int]] = None + no_stop_trim: bool = False + ignore_eos: bool = False + skip_special_tokens: bool = True class CompletionResponseChoice(BaseModel): @@ -259,28 +260,30 @@ class ChatCompletionRequest(BaseModel): # https://platform.openai.com/docs/api-reference/chat/create messages: List[ChatCompletionMessageParam] model: str - frequency_penalty: Optional[float] = 0.0 + frequency_penalty: float = 0.0 logit_bias: Optional[Dict[str, float]] = None - logprobs: Optional[bool] = False + logprobs: bool = False top_logprobs: Optional[int] = None max_tokens: Optional[int] = None - n: Optional[int] = 1 - presence_penalty: Optional[float] = 0.0 + n: int = 1 + presence_penalty: float = 0.0 response_format: Optional[ResponseFormat] = None seed: Optional[int] = None - stop: Optional[Union[str, List[str]]] = Field(default_factory=list) - stream: Optional[bool] = False + stop: Optional[Union[str, List[str]]] = None + stream: bool = False stream_options: Optional[StreamOptions] = None - temperature: Optional[float] = 0.7 - top_p: Optional[float] = 1.0 + temperature: float = 0.7 + top_p: float = 1.0 user: Optional[str] = None # Extra parameters for SRT backend only and will be ignored by OpenAI models. regex: Optional[str] = None - min_tokens: Optional[int] = 0 - repetition_penalty: Optional[float] = 1.0 - stop_token_ids: Optional[List[int]] = Field(default_factory=list) + min_tokens: int = 0 + repetition_penalty: float = 1.0 + stop_token_ids: Optional[List[int]] = None + no_stop_trim: bool = False ignore_eos: bool = False + skip_special_tokens: bool = True class ChatMessage(BaseModel): diff --git a/python/sglang/srt/sampling/sampling_params.py b/python/sglang/srt/sampling/sampling_params.py index a0cb8c74c..e5b876f6d 100644 --- a/python/sglang/srt/sampling/sampling_params.py +++ b/python/sglang/srt/sampling/sampling_params.py @@ -34,13 +34,13 @@ class SamplingParams: frequency_penalty: float = 0.0, presence_penalty: float = 0.0, repetition_penalty: float = 1.0, - ignore_eos: bool = False, - skip_special_tokens: bool = True, spaces_between_special_tokens: bool = True, regex: Optional[str] = None, n: int = 1, json_schema: Optional[str] = None, no_stop_trim: bool = False, + ignore_eos: bool = False, + skip_special_tokens: bool = True, ) -> None: self.temperature = temperature self.top_p = top_p