diff --git a/sgl-router/benches/request_processing.rs b/sgl-router/benches/request_processing.rs
index 3579d9c67..3d2d55713 100644
--- a/sgl-router/benches/request_processing.rs
+++ b/sgl-router/benches/request_processing.rs
@@ -48,50 +48,15 @@ fn default_generate_request() -> GenerateRequest {
 }
 
 /// Create a default ChatCompletionRequest for benchmarks with minimal fields set
+#[allow(deprecated)]
 fn default_chat_completion_request() -> ChatCompletionRequest {
     ChatCompletionRequest {
-        model: String::new(),
+        // Required fields in OpenAI order
         messages: vec![],
-        max_tokens: None,
-        max_completion_tokens: None,
-        temperature: None,
-        top_p: None,
-        n: None,
-        stream: false,
-        stream_options: None,
-        stop: None,
-        presence_penalty: None,
-        frequency_penalty: None,
-        logit_bias: None,
-        logprobs: false,
-        top_logprobs: None,
-        user: None,
-        response_format: None,
-        seed: None,
-        tools: None,
-        tool_choice: None,
-        parallel_tool_calls: None,
-        function_call: None,
-        functions: None,
-        // SGLang Extensions
-        top_k: None,
-        min_p: None,
-        min_tokens: None,
-        repetition_penalty: None,
-        regex: None,
-        ebnf: None,
-        stop_token_ids: None,
-        no_stop_trim: false,
-        ignore_eos: false,
-        continue_final_message: false,
-        skip_special_tokens: true,
-        // SGLang Extensions
-        lora_path: None,
-        session_params: None,
-        separate_reasoning: true,
-        stream_reasoning: true,
-        chat_template_kwargs: None,
-        return_hidden_states: false,
+        model: String::new(),
+
+        // Use default for all other fields
+        ..Default::default()
     }
 }
 
@@ -161,6 +126,7 @@ fn create_sample_generate_request() -> GenerateRequest {
     }
 }
 
+#[allow(deprecated)]
 fn create_sample_chat_completion_request() -> ChatCompletionRequest {
     ChatCompletionRequest {
         model: "gpt-3.5-turbo".to_string(),
@@ -205,6 +171,7 @@ fn create_sample_completion_request() -> CompletionRequest {
     }
 }
 
+#[allow(deprecated)]
 fn create_large_chat_completion_request() -> ChatCompletionRequest {
     let mut messages = vec![ChatMessage::System {
         role: "system".to_string(),
@@ -240,7 +207,6 @@ fn create_large_chat_completion_request() -> ChatCompletionRequest {
         presence_penalty: Some(0.1),
         frequency_penalty: Some(0.1),
         top_logprobs: Some(5),
-        user: Some("benchmark_user".to_string()),
         seed: Some(42),
         parallel_tool_calls: Some(true),
         ..default_chat_completion_request()
diff --git a/sgl-router/src/protocols/spec.rs b/sgl-router/src/protocols/spec.rs
index 4760626b5..cb1f0a992 100644
--- a/sgl-router/src/protocols/spec.rs
+++ b/sgl-router/src/protocols/spec.rs
@@ -179,26 +179,94 @@ pub struct FunctionCallDelta {
 
 // ============= Request =============
 
-#[derive(Debug, Clone, Deserialize, Serialize)]
+#[derive(Debug, Clone, Deserialize, Serialize, Default)]
 pub struct ChatCompletionRequest {
-    /// ID of the model to use
-    pub model: String,
-
     /// A list of messages comprising the conversation so far
     pub messages: Vec<ChatMessage>,
 
-    /// What sampling temperature to use, between 0 and 2
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub temperature: Option<f32>,
+    /// ID of the model to use
+    pub model: String,
 
-    /// An alternative to sampling with temperature
+    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub top_p: Option<f32>,
+    pub frequency_penalty: Option<f32>,
+
+    /// Deprecated: Replaced by tool_choice
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[deprecated(note = "Use tool_choice instead")]
+    pub function_call: Option<FunctionCall>,
+
+    /// Deprecated: Replaced by tools
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[deprecated(note = "Use tools instead")]
+    pub functions: Option<Vec<Function>>,
+
+    /// Modify the likelihood of specified tokens appearing in the completion
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub logit_bias: Option<HashMap<String, f32>>,
+
+    /// Whether to return log probabilities of the output tokens
+    #[serde(default)]
+    pub logprobs: bool,
+
+    /// Deprecated: Replaced by max_completion_tokens
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[deprecated(note = "Use max_completion_tokens instead")]
+    pub max_tokens: Option<u32>,
+
+    /// An upper bound for the number of tokens that can be generated for a completion
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_completion_tokens: Option<u32>,
+
+    /// Developer-defined tags and values used for filtering completions in the dashboard
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<HashMap<String, String>>,
+
+    /// Output types that you would like the model to generate for this request
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub modalities: Option<Vec<String>>,
 
     /// How many chat completion choices to generate for each input message
     #[serde(skip_serializing_if = "Option::is_none")]
     pub n: Option<u32>,
 
+    /// Whether to enable parallel function calling during tool use
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub parallel_tool_calls: Option<bool>,
+
+    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub presence_penalty: Option<f32>,
+
+    /// Cache key for prompts (beta feature)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub prompt_cache_key: Option<String>,
+
+    /// Effort level for reasoning models (low, medium, high)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning_effort: Option<String>,
+
+    /// An object specifying the format that the model must output
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub response_format: Option<ResponseFormat>,
+
+    /// Safety identifier for content moderation
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub safety_identifier: Option<String>,
+
+    /// Deprecated: This feature is in Legacy mode
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[deprecated(note = "This feature is in Legacy mode")]
+    pub seed: Option<i64>,
+
+    /// The service tier to use for this request
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub service_tier: Option<String>,
+
+    /// Up to 4 sequences where the API will stop generating further tokens
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub stop: Option<StringOrArray>,
+
     /// If set, partial message deltas will be sent
     #[serde(default)]
     pub stream: bool,
@@ -207,69 +275,29 @@ pub struct ChatCompletionRequest {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub stream_options: Option<StreamOptions>,
 
-    /// Up to 4 sequences where the API will stop generating further tokens
+    /// What sampling temperature to use, between 0 and 2
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub stop: Option<StringOrArray>,
-
-    /// The maximum number of tokens to generate
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub max_tokens: Option<u32>,
-
-    /// An upper bound for the number of tokens that can be generated for a completion
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub max_completion_tokens: Option<u32>,
-
-    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub presence_penalty: Option<f32>,
-
-    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub frequency_penalty: Option<f32>,
-
-    /// Modify the likelihood of specified tokens appearing in the completion
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub logit_bias: Option<HashMap<String, f32>>,
-
-    /// A unique identifier representing your end-user
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub user: Option<String>,
-
-    /// If specified, our system will make a best effort to sample deterministically
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub seed: Option<i64>,
-
-    /// Whether to return log probabilities of the output tokens
-    #[serde(default)]
-    pub logprobs: bool,
-
-    /// An integer between 0 and 20 specifying the number of most likely tokens to return
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub top_logprobs: Option<u32>,
-
-    /// An object specifying the format that the model must output
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub response_format: Option<ResponseFormat>,
-
-    /// A list of tools the model may call
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tools: Option<Vec<Tool>>,
+    pub temperature: Option<f32>,
 
     /// Controls which (if any) tool is called by the model
     #[serde(skip_serializing_if = "Option::is_none")]
     pub tool_choice: Option<ToolChoice>,
 
-    /// Whether to enable parallel function calling during tool use
+    /// A list of tools the model may call
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub parallel_tool_calls: Option<bool>,
+    pub tools: Option<Vec<Tool>>,
 
-    /// Deprecated: use tools instead
+    /// An integer between 0 and 20 specifying the number of most likely tokens to return
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub functions: Option<Vec<Function>>,
+    pub top_logprobs: Option<u32>,
 
-    /// Deprecated: use tool_choice instead
+    /// An alternative to sampling with temperature
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub function_call: Option<FunctionCall>,
+    pub top_p: Option<f32>,
+
+    /// Verbosity level for debugging
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub verbosity: Option<i32>,
 
     // ============= SGLang Extensions =============
     /// Top-k sampling parameter (-1 to disable)
@@ -316,7 +344,6 @@ pub struct ChatCompletionRequest {
     #[serde(default = "default_true")]
     pub skip_special_tokens: bool,
 
-    // ============= SGLang Extensions =============
     /// Path to LoRA adapter(s) for model customization
     #[serde(skip_serializing_if = "Option::is_none")]
     pub lora_path: Option<LoRAPath>,
diff --git a/sgl-router/src/protocols/validation.rs b/sgl-router/src/protocols/validation.rs
index 460ce2148..ee702f7db 100644
--- a/sgl-router/src/protocols/validation.rs
+++ b/sgl-router/src/protocols/validation.rs
@@ -563,6 +563,7 @@ impl StopConditionsProvider for ChatCompletionRequest {
 }
 
 impl TokenLimitsProvider for ChatCompletionRequest {
+    #[allow(deprecated)]
     fn get_max_tokens(&self) -> Option<u32> {
         // Prefer max_completion_tokens over max_tokens if both are set
         self.max_completion_tokens.or(self.max_tokens)
@@ -656,19 +657,13 @@ impl ChatCompletionRequest {
 
     /// Validate chat API specific logprobs requirements
     pub fn validate_chat_logprobs(&self) -> Result<(), ValidationError> {
-        // In chat API, if logprobs=true, top_logprobs must be specified
-        if self.logprobs && self.top_logprobs.is_none() {
-            return Err(ValidationError::MissingRequired {
-                parameter: "top_logprobs".to_string(),
-            });
-        }
-
-        // If top_logprobs is specified, logprobs should be true
+        // OpenAI rule: If top_logprobs is specified, logprobs must be true
+        // But logprobs=true without top_logprobs is valid (returns basic logprobs)
         if self.top_logprobs.is_some() && !self.logprobs {
             return Err(ValidationError::InvalidValue {
-                parameter: "logprobs".to_string(),
-                value: "false".to_string(),
-                reason: "must be true when top_logprobs is specified".to_string(),
+                parameter: "top_logprobs".to_string(),
+                value: self.top_logprobs.unwrap().to_string(),
+                reason: "top_logprobs is only allowed when logprobs is enabled".to_string(),
             });
         }
 
@@ -676,6 +671,7 @@ impl ChatCompletionRequest {
     }
 
     /// Validate cross-parameter relationships specific to chat completions
+    #[allow(deprecated)]
     pub fn validate_chat_cross_parameters(&self) -> Result<(), ValidationError> {
         // Validate that both max_tokens and max_completion_tokens aren't set
         utils::validate_conflicting_parameters(
@@ -871,53 +867,24 @@ mod tests {
     mod chat_tests {
         use super::*;
 
+        #[allow(deprecated)]
         fn create_valid_chat_request() -> ChatCompletionRequest {
             ChatCompletionRequest {
-                model: "gpt-4".to_string(),
                 messages: vec![ChatMessage::User {
                     role: "user".to_string(),
                     content: UserMessageContent::Text("Hello".to_string()),
                     name: None,
                 }],
+                model: "gpt-4".to_string(),
+                // Set specific fields we want to test
                 temperature: Some(1.0),
                 top_p: Some(0.9),
                 n: Some(1),
-                stream: false,
-                stream_options: None,
-                stop: None,
                 max_tokens: Some(100),
-                max_completion_tokens: None,
-                presence_penalty: Some(0.0),
                 frequency_penalty: Some(0.0),
-                logit_bias: None,
-                user: None,
-                seed: None,
-                logprobs: false,
-                top_logprobs: None,
-                response_format: None,
-                tools: None,
-                tool_choice: None,
-                parallel_tool_calls: None,
-                functions: None,
-                function_call: None,
-                // SGLang extensions
-                top_k: None,
-                min_p: None,
-                min_tokens: None,
-                repetition_penalty: None,
-                regex: None,
-                ebnf: None,
-                stop_token_ids: None,
-                no_stop_trim: false,
-                ignore_eos: false,
-                continue_final_message: false,
-                skip_special_tokens: true,
-                lora_path: None,
-                session_params: None,
-                separate_reasoning: true,
-                stream_reasoning: true,
-                chat_template_kwargs: None,
-                return_hidden_states: false,
+                presence_penalty: Some(0.0),
+                // Use default for all other fields
+                ..Default::default()
             }
         }
 
@@ -938,19 +905,47 @@ mod tests {
         }
 
         #[test]
-        fn test_chat_conflicts() {
+        #[allow(deprecated)]
+        fn test_chat_cross_parameter_conflicts() {
             let mut request = create_valid_chat_request();
 
-            // Conflicting max_tokens
+            // Test 1: max_tokens vs max_completion_tokens conflict
             request.max_tokens = Some(100);
             request.max_completion_tokens = Some(200);
-            assert!(request.validate().is_err());
+            assert!(
+                request.validate().is_err(),
+                "Should reject both max_tokens and max_completion_tokens"
+            );
 
-            // Logprobs without top_logprobs
+            // Reset for next test
             request.max_tokens = None;
+            request.max_completion_tokens = None;
+
+            // Test 2: tools vs functions conflict (deprecated)
+            request.tools = Some(vec![]);
+            request.functions = Some(vec![]);
+            assert!(
+                request.validate().is_err(),
+                "Should reject both tools and functions"
+            );
+
+            // Test 3: logprobs=true without top_logprobs should be valid
+            let mut request = create_valid_chat_request();
             request.logprobs = true;
             request.top_logprobs = None;
-            assert!(request.validate().is_err());
+            assert!(
+                request.validate().is_ok(),
+                "logprobs=true without top_logprobs should be valid"
+            );
+
+            // Test 4: top_logprobs without logprobs=true should fail (OpenAI rule)
+            let mut request = create_valid_chat_request();
+            request.logprobs = false;
+            request.top_logprobs = Some(5);
+            assert!(
+                request.validate().is_err(),
+                "top_logprobs without logprobs=true should fail"
+            );
         }
 
         #[test]
@@ -1097,14 +1092,17 @@ mod tests {
         fn test_logprobs_validation() {
             let mut request = create_valid_chat_request();
 
-            // Valid logprobs configuration
+            // Valid logprobs configuration with top_logprobs
             request.logprobs = true;
             request.top_logprobs = Some(10);
             assert!(request.validate().is_ok());
 
-            // logprobs=true without top_logprobs should fail
+            // logprobs=true without top_logprobs should be valid (OpenAI behavior)
             request.top_logprobs = None;
-            assert!(request.validate().is_err());
+            assert!(
+                request.validate().is_ok(),
+                "logprobs=true without top_logprobs should be valid"
+            );
 
             // top_logprobs without logprobs=true should fail
             request.logprobs = false;
@@ -1137,6 +1135,7 @@ mod tests {
         }
 
         #[test]
+        #[allow(deprecated)]
         fn test_min_max_tokens_validation() {
             let mut request = create_valid_chat_request();