From 51ae40306a9a607e935881a3fdfbccc46505cc84 Mon Sep 17 00:00:00 2001 From: Shuaiyi Zhang <576893949@qq.com> Date: Tue, 8 Jul 2025 23:03:38 +0800 Subject: [PATCH] [router] forward stream_options in request (#7860) --- sgl-router/benches/request_processing.rs | 3 +++ sgl-router/src/openai_api_types.rs | 14 ++++++++++++++ sgl-router/tests/benchmark_integration.rs | 4 ++++ 3 files changed, 21 insertions(+) diff --git a/sgl-router/benches/request_processing.rs b/sgl-router/benches/request_processing.rs index 5761e8e1c..c2cee90d5 100644 --- a/sgl-router/benches/request_processing.rs +++ b/sgl-router/benches/request_processing.rs @@ -59,6 +59,7 @@ fn create_sample_chat_completion_request() -> ChatCompletionRequest { top_p: Some(1.0), n: Some(1), stream: false, + stream_options: None, stop: None, presence_penalty: Some(0.0), frequency_penalty: Some(0.0), @@ -86,6 +87,7 @@ fn create_sample_completion_request() -> CompletionRequest { top_p: Some(1.0), n: Some(1), stream: false, + stream_options: None, logprobs: None, echo: false, stop: None, @@ -130,6 +132,7 @@ fn create_large_chat_completion_request() -> ChatCompletionRequest { top_p: Some(0.95), n: Some(1), stream: false, + stream_options: None, stop: None, presence_penalty: Some(0.1), frequency_penalty: Some(0.1), diff --git a/sgl-router/src/openai_api_types.rs b/sgl-router/src/openai_api_types.rs index 808f8b46f..9870fd06b 100644 --- a/sgl-router/src/openai_api_types.rs +++ b/sgl-router/src/openai_api_types.rs @@ -52,6 +52,10 @@ pub struct CompletionRequest { #[serde(default)] pub stream: bool, + /// Options for streaming response + #[serde(skip_serializing_if = "Option::is_none")] + pub stream_options: Option, + /// Include the log probabilities on the logprobs most likely tokens #[serde(skip_serializing_if = "Option::is_none")] pub logprobs: Option, @@ -132,6 +136,10 @@ pub struct ChatCompletionRequest { #[serde(default)] pub stream: bool, + /// Options for streaming response + #[serde(skip_serializing_if = "Option::is_none")] + pub stream_options: Option, + /// Up to 4 sequences where the API will stop generating further tokens #[serde(skip_serializing_if = "Option::is_none")] pub stop: Option, @@ -258,6 +266,12 @@ pub struct ImageUrl { pub detail: Option, // "auto", "low", or "high" } +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct StreamOptions { + #[serde(skip_serializing_if = "Option::is_none")] + pub include_usage: Option, +} + #[derive(Debug, Clone, Deserialize, Serialize)] #[serde(tag = "type")] pub enum ResponseFormat { diff --git a/sgl-router/tests/benchmark_integration.rs b/sgl-router/tests/benchmark_integration.rs index a57714b0d..b21c93fcf 100644 --- a/sgl-router/tests/benchmark_integration.rs +++ b/sgl-router/tests/benchmark_integration.rs @@ -50,6 +50,7 @@ fn test_benchmark_request_creation() { top_p: Some(1.0), n: Some(1), stream: false, + stream_options: None, stop: None, presence_penalty: Some(0.0), frequency_penalty: Some(0.0), @@ -75,6 +76,7 @@ fn test_benchmark_request_creation() { top_p: Some(1.0), n: Some(1), stream: false, + stream_options: None, logprobs: None, echo: false, stop: None, @@ -143,6 +145,7 @@ fn test_benchmark_request_adaptation() { top_p: Some(1.0), n: Some(1), stream: false, + stream_options: None, stop: None, presence_penalty: Some(0.0), frequency_penalty: Some(0.0), @@ -168,6 +171,7 @@ fn test_benchmark_request_adaptation() { top_p: Some(1.0), n: Some(1), stream: false, + stream_options: None, logprobs: None, echo: false, stop: None,