[router][protocols] Add Axum validate extractor and use it for /v1/chat/completions endpoint (#11588)

This commit is contained in:
Chang Su
2025-10-13 22:51:15 -07:00
committed by GitHub
parent e4358a4585
commit 27ef1459e6
21 changed files with 1982 additions and 2003 deletions

View File

@@ -4,8 +4,8 @@ use std::time::Instant;
use sglang_router_rs::core::{BasicWorker, BasicWorkerBuilder, Worker, WorkerType};
use sglang_router_rs::protocols::spec::{
ChatCompletionRequest, ChatMessage, CompletionRequest, GenerateParameters, GenerateRequest,
SamplingParams, StringOrArray, UserMessageContent,
ChatCompletionRequest, ChatMessage, CompletionRequest, GenerateRequest, SamplingParams,
StringOrArray, UserMessageContent,
};
use sglang_router_rs::routers::http::pd_types::{generate_room_id, RequestWithBootstrap};
@@ -31,7 +31,6 @@ fn default_generate_request() -> GenerateRequest {
prompt: None,
input_ids: None,
stream: false,
parameters: None,
sampling_params: None,
return_logprob: false,
// SGLang Extensions
@@ -101,14 +100,6 @@ fn default_completion_request() -> CompletionRequest {
fn create_sample_generate_request() -> GenerateRequest {
GenerateRequest {
text: Some("Write a story about artificial intelligence".to_string()),
parameters: Some(GenerateParameters {
max_new_tokens: Some(100),
temperature: Some(0.8),
top_p: Some(0.9),
top_k: Some(50),
repetition_penalty: Some(1.0),
..Default::default()
}),
sampling_params: Some(SamplingParams {
temperature: Some(0.8),
top_p: Some(0.9),
@@ -128,12 +119,10 @@ fn create_sample_chat_completion_request() -> ChatCompletionRequest {
model: "gpt-3.5-turbo".to_string(),
messages: vec![
ChatMessage::System {
role: "system".to_string(),
content: "You are a helpful assistant".to_string(),
name: None,
},
ChatMessage::User {
role: "user".to_string(),
content: UserMessageContent::Text(
"Explain quantum computing in simple terms".to_string(),
),
@@ -170,7 +159,6 @@ fn create_sample_completion_request() -> CompletionRequest {
#[allow(deprecated)]
fn create_large_chat_completion_request() -> ChatCompletionRequest {
let mut messages = vec![ChatMessage::System {
role: "system".to_string(),
content: "You are a helpful assistant with extensive knowledge.".to_string(),
name: None,
}];
@@ -178,12 +166,10 @@ fn create_large_chat_completion_request() -> ChatCompletionRequest {
// Add many user/assistant pairs to simulate a long conversation
for i in 0..50 {
messages.push(ChatMessage::User {
role: "user".to_string(),
content: UserMessageContent::Text(format!("Question {}: What do you think about topic number {} which involves complex reasoning about multiple interconnected systems and their relationships?", i, i)),
name: None,
});
messages.push(ChatMessage::Assistant {
role: "assistant".to_string(),
content: Some(format!("Answer {}: This is a detailed response about topic {} that covers multiple aspects and provides comprehensive analysis of the interconnected systems you mentioned.", i, i)),
name: None,
tool_calls: None,