router-spec: Reorder ChatCompletionRequest and fix validation logic (#10675)

2025-09-19 16:41:21 -07:00
parent 00eb5eb721
commit 03ce92e594
3 changed files with 150 additions and 158 deletions
--- a/sgl-router/src/protocols/spec.rs
+++ b/sgl-router/src/protocols/spec.rs
@@ -179,26 +179,94 @@ pub struct FunctionCallDelta {

 // ============= Request =============

-#[derive(Debug, Clone, Deserialize, Serialize)]
+#[derive(Debug, Clone, Deserialize, Serialize, Default)]
 pub struct ChatCompletionRequest {
-    /// ID of the model to use
-    pub model: String,
-
    /// A list of messages comprising the conversation so far
    pub messages: Vec<ChatMessage>,

-    /// What sampling temperature to use, between 0 and 2
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub temperature: Option<f32>,
+    /// ID of the model to use
+    pub model: String,

-    /// An alternative to sampling with temperature
+    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub top_p: Option<f32>,
+    pub frequency_penalty: Option<f32>,
+
+    /// Deprecated: Replaced by tool_choice
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[deprecated(note = "Use tool_choice instead")]
+    pub function_call: Option<FunctionCall>,
+
+    /// Deprecated: Replaced by tools
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[deprecated(note = "Use tools instead")]
+    pub functions: Option<Vec<Function>>,
+
+    /// Modify the likelihood of specified tokens appearing in the completion
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub logit_bias: Option<HashMap<String, f32>>,
+
+    /// Whether to return log probabilities of the output tokens
+    #[serde(default)]
+    pub logprobs: bool,
+
+    /// Deprecated: Replaced by max_completion_tokens
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[deprecated(note = "Use max_completion_tokens instead")]
+    pub max_tokens: Option<u32>,
+
+    /// An upper bound for the number of tokens that can be generated for a completion
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_completion_tokens: Option<u32>,
+
+    /// Developer-defined tags and values used for filtering completions in the dashboard
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<HashMap<String, String>>,
+
+    /// Output types that you would like the model to generate for this request
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub modalities: Option<Vec<String>>,

    /// How many chat completion choices to generate for each input message
    #[serde(skip_serializing_if = "Option::is_none")]
    pub n: Option<u32>,

+    /// Whether to enable parallel function calling during tool use
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub parallel_tool_calls: Option<bool>,
+
+    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub presence_penalty: Option<f32>,
+
+    /// Cache key for prompts (beta feature)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub prompt_cache_key: Option<String>,
+
+    /// Effort level for reasoning models (low, medium, high)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning_effort: Option<String>,
+
+    /// An object specifying the format that the model must output
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub response_format: Option<ResponseFormat>,
+
+    /// Safety identifier for content moderation
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub safety_identifier: Option<String>,
+
+    /// Deprecated: This feature is in Legacy mode
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[deprecated(note = "This feature is in Legacy mode")]
+    pub seed: Option<i64>,
+
+    /// The service tier to use for this request
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub service_tier: Option<String>,
+
+    /// Up to 4 sequences where the API will stop generating further tokens
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub stop: Option<StringOrArray>,
+
    /// If set, partial message deltas will be sent
    #[serde(default)]
    pub stream: bool,
@@ -207,69 +275,29 @@ pub struct ChatCompletionRequest {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub stream_options: Option<StreamOptions>,

-    /// Up to 4 sequences where the API will stop generating further tokens
+    /// What sampling temperature to use, between 0 and 2
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub stop: Option<StringOrArray>,
-
-    /// The maximum number of tokens to generate
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub max_tokens: Option<u32>,
-
-    /// An upper bound for the number of tokens that can be generated for a completion
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub max_completion_tokens: Option<u32>,
-
-    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub presence_penalty: Option<f32>,
-
-    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub frequency_penalty: Option<f32>,
-
-    /// Modify the likelihood of specified tokens appearing in the completion
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub logit_bias: Option<HashMap<String, f32>>,
-
-    /// A unique identifier representing your end-user
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub user: Option<String>,
-
-    /// If specified, our system will make a best effort to sample deterministically
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub seed: Option<i64>,
-
-    /// Whether to return log probabilities of the output tokens
-    #[serde(default)]
-    pub logprobs: bool,
-
-    /// An integer between 0 and 20 specifying the number of most likely tokens to return
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub top_logprobs: Option<u32>,
-
-    /// An object specifying the format that the model must output
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub response_format: Option<ResponseFormat>,
-
-    /// A list of tools the model may call
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tools: Option<Vec<Tool>>,
+    pub temperature: Option<f32>,

    /// Controls which (if any) tool is called by the model
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_choice: Option<ToolChoice>,

-    /// Whether to enable parallel function calling during tool use
+    /// A list of tools the model may call
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub parallel_tool_calls: Option<bool>,
+    pub tools: Option<Vec<Tool>>,

-    /// Deprecated: use tools instead
+    /// An integer between 0 and 20 specifying the number of most likely tokens to return
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub functions: Option<Vec<Function>>,
+    pub top_logprobs: Option<u32>,

-    /// Deprecated: use tool_choice instead
+    /// An alternative to sampling with temperature
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub function_call: Option<FunctionCall>,
+    pub top_p: Option<f32>,
+
+    /// Verbosity level for debugging
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub verbosity: Option<i32>,

    // ============= SGLang Extensions =============
    /// Top-k sampling parameter (-1 to disable)
@@ -316,7 +344,6 @@ pub struct ChatCompletionRequest {
    #[serde(default = "default_true")]
    pub skip_special_tokens: bool,

-    // ============= SGLang Extensions =============
    /// Path to LoRA adapter(s) for model customization
    #[serde(skip_serializing_if = "Option::is_none")]
    pub lora_path: Option<LoRAPath>,