router-spec: Reorder ChatCompletionRequest and fix validation logic (#10675)

This commit is contained in:
Chang Su
2025-09-19 16:41:21 -07:00
committed by GitHub
parent 00eb5eb721
commit 03ce92e594
3 changed files with 150 additions and 158 deletions

View File

@@ -179,26 +179,94 @@ pub struct FunctionCallDelta {
// ============= Request =============
#[derive(Debug, Clone, Deserialize, Serialize)]
#[derive(Debug, Clone, Deserialize, Serialize, Default)]
pub struct ChatCompletionRequest {
/// ID of the model to use
pub model: String,
/// A list of messages comprising the conversation so far
pub messages: Vec<ChatMessage>,
/// What sampling temperature to use, between 0 and 2
#[serde(skip_serializing_if = "Option::is_none")]
pub temperature: Option<f32>,
/// ID of the model to use
pub model: String,
/// An alternative to sampling with temperature
/// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far
#[serde(skip_serializing_if = "Option::is_none")]
pub top_p: Option<f32>,
pub frequency_penalty: Option<f32>,
/// Deprecated: Replaced by tool_choice
#[serde(skip_serializing_if = "Option::is_none")]
#[deprecated(note = "Use tool_choice instead")]
pub function_call: Option<FunctionCall>,
/// Deprecated: Replaced by tools
#[serde(skip_serializing_if = "Option::is_none")]
#[deprecated(note = "Use tools instead")]
pub functions: Option<Vec<Function>>,
/// Modify the likelihood of specified tokens appearing in the completion
#[serde(skip_serializing_if = "Option::is_none")]
pub logit_bias: Option<HashMap<String, f32>>,
/// Whether to return log probabilities of the output tokens
#[serde(default)]
pub logprobs: bool,
/// Deprecated: Replaced by max_completion_tokens
#[serde(skip_serializing_if = "Option::is_none")]
#[deprecated(note = "Use max_completion_tokens instead")]
pub max_tokens: Option<u32>,
/// An upper bound for the number of tokens that can be generated for a completion
#[serde(skip_serializing_if = "Option::is_none")]
pub max_completion_tokens: Option<u32>,
/// Developer-defined tags and values used for filtering completions in the dashboard
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<HashMap<String, String>>,
/// Output types that you would like the model to generate for this request
#[serde(skip_serializing_if = "Option::is_none")]
pub modalities: Option<Vec<String>>,
/// How many chat completion choices to generate for each input message
#[serde(skip_serializing_if = "Option::is_none")]
pub n: Option<u32>,
/// Whether to enable parallel function calling during tool use
#[serde(skip_serializing_if = "Option::is_none")]
pub parallel_tool_calls: Option<bool>,
/// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far
#[serde(skip_serializing_if = "Option::is_none")]
pub presence_penalty: Option<f32>,
/// Cache key for prompts (beta feature)
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt_cache_key: Option<String>,
/// Effort level for reasoning models (low, medium, high)
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_effort: Option<String>,
/// An object specifying the format that the model must output
#[serde(skip_serializing_if = "Option::is_none")]
pub response_format: Option<ResponseFormat>,
/// Safety identifier for content moderation
#[serde(skip_serializing_if = "Option::is_none")]
pub safety_identifier: Option<String>,
/// Deprecated: This feature is in Legacy mode
#[serde(skip_serializing_if = "Option::is_none")]
#[deprecated(note = "This feature is in Legacy mode")]
pub seed: Option<i64>,
/// The service tier to use for this request
#[serde(skip_serializing_if = "Option::is_none")]
pub service_tier: Option<String>,
/// Up to 4 sequences where the API will stop generating further tokens
#[serde(skip_serializing_if = "Option::is_none")]
pub stop: Option<StringOrArray>,
/// If set, partial message deltas will be sent
#[serde(default)]
pub stream: bool,
@@ -207,69 +275,29 @@ pub struct ChatCompletionRequest {
#[serde(skip_serializing_if = "Option::is_none")]
pub stream_options: Option<StreamOptions>,
/// Up to 4 sequences where the API will stop generating further tokens
/// What sampling temperature to use, between 0 and 2
#[serde(skip_serializing_if = "Option::is_none")]
pub stop: Option<StringOrArray>,
/// The maximum number of tokens to generate
#[serde(skip_serializing_if = "Option::is_none")]
pub max_tokens: Option<u32>,
/// An upper bound for the number of tokens that can be generated for a completion
#[serde(skip_serializing_if = "Option::is_none")]
pub max_completion_tokens: Option<u32>,
/// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far
#[serde(skip_serializing_if = "Option::is_none")]
pub presence_penalty: Option<f32>,
/// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far
#[serde(skip_serializing_if = "Option::is_none")]
pub frequency_penalty: Option<f32>,
/// Modify the likelihood of specified tokens appearing in the completion
#[serde(skip_serializing_if = "Option::is_none")]
pub logit_bias: Option<HashMap<String, f32>>,
/// A unique identifier representing your end-user
#[serde(skip_serializing_if = "Option::is_none")]
pub user: Option<String>,
/// If specified, our system will make a best effort to sample deterministically
#[serde(skip_serializing_if = "Option::is_none")]
pub seed: Option<i64>,
/// Whether to return log probabilities of the output tokens
#[serde(default)]
pub logprobs: bool,
/// An integer between 0 and 20 specifying the number of most likely tokens to return
#[serde(skip_serializing_if = "Option::is_none")]
pub top_logprobs: Option<u32>,
/// An object specifying the format that the model must output
#[serde(skip_serializing_if = "Option::is_none")]
pub response_format: Option<ResponseFormat>,
/// A list of tools the model may call
#[serde(skip_serializing_if = "Option::is_none")]
pub tools: Option<Vec<Tool>>,
pub temperature: Option<f32>,
/// Controls which (if any) tool is called by the model
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_choice: Option<ToolChoice>,
/// Whether to enable parallel function calling during tool use
/// A list of tools the model may call
#[serde(skip_serializing_if = "Option::is_none")]
pub parallel_tool_calls: Option<bool>,
pub tools: Option<Vec<Tool>>,
/// Deprecated: use tools instead
/// An integer between 0 and 20 specifying the number of most likely tokens to return
#[serde(skip_serializing_if = "Option::is_none")]
pub functions: Option<Vec<Function>>,
pub top_logprobs: Option<u32>,
/// Deprecated: use tool_choice instead
/// An alternative to sampling with temperature
#[serde(skip_serializing_if = "Option::is_none")]
pub function_call: Option<FunctionCall>,
pub top_p: Option<f32>,
/// Verbosity level for debugging
#[serde(skip_serializing_if = "Option::is_none")]
pub verbosity: Option<i32>,
// ============= SGLang Extensions =============
/// Top-k sampling parameter (-1 to disable)
@@ -316,7 +344,6 @@ pub struct ChatCompletionRequest {
#[serde(default = "default_true")]
pub skip_special_tokens: bool,
// ============= SGLang Extensions =============
/// Path to LoRA adapter(s) for model customization
#[serde(skip_serializing_if = "Option::is_none")]
pub lora_path: Option<LoRAPath>,