[router] Support history management using conversation (#11339)

This commit is contained in:
Keyang Ru
2025-10-08 15:24:02 -07:00
committed by GitHub
parent a1080b72a0
commit 7ac6b900f4
15 changed files with 1529 additions and 38 deletions

View File

@@ -125,6 +125,7 @@ async fn test_non_streaming_mcp_minimal_e2e_with_persistence() {
top_k: -1,
min_p: 0.0,
repetition_penalty: 1.0,
conversation: None,
};
let resp = router
@@ -371,6 +372,7 @@ fn test_responses_request_creation() {
top_k: -1,
min_p: 0.0,
repetition_penalty: 1.0,
conversation: None,
};
assert!(!request.is_stream());
@@ -411,6 +413,7 @@ fn test_sampling_params_conversion() {
top_k: 10,
min_p: 0.05,
repetition_penalty: 1.1,
conversation: None,
};
let params = request.to_sampling_params(1000, None);
@@ -524,6 +527,7 @@ fn test_json_serialization() {
top_k: 50,
min_p: 0.1,
repetition_penalty: 1.2,
conversation: None,
};
let json = serde_json::to_string(&request).expect("Serialization should work");
@@ -651,6 +655,7 @@ async fn test_multi_turn_loop_with_mcp() {
top_k: 50,
min_p: 0.0,
repetition_penalty: 1.0,
conversation: None,
};
// Execute the request (this should trigger the multi-turn loop)
@@ -828,6 +833,7 @@ async fn test_max_tool_calls_limit() {
top_k: 50,
min_p: 0.0,
repetition_penalty: 1.0,
conversation: None,
};
let response = router.route_responses(None, &req, None).await;
@@ -1023,6 +1029,7 @@ async fn test_streaming_with_mcp_tool_calls() {
top_k: 50,
min_p: 0.0,
repetition_penalty: 1.0,
conversation: None,
};
let response = router.route_responses(None, &req, None).await;
@@ -1301,6 +1308,7 @@ async fn test_streaming_multi_turn_with_mcp() {
top_k: 50,
min_p: 0.0,
repetition_penalty: 1.0,
conversation: None,
};
let response = router.route_responses(None, &req, None).await;

View File

@@ -9,6 +9,7 @@ use axum::{
Json, Router,
};
use serde_json::json;
use sglang_router_rs::data_connector::MemoryConversationItemStorage;
use sglang_router_rs::{
config::{
ConfigError, ConfigValidator, HistoryBackend, OracleConfig, RouterConfig, RoutingMode,
@@ -95,6 +96,7 @@ async fn test_openai_router_creation() {
None,
Arc::new(MemoryResponseStorage::new()),
Arc::new(MemoryConversationStorage::new()),
Arc::new(sglang_router_rs::data_connector::MemoryConversationItemStorage::new()),
)
.await;
@@ -113,6 +115,7 @@ async fn test_openai_router_server_info() {
None,
Arc::new(MemoryResponseStorage::new()),
Arc::new(MemoryConversationStorage::new()),
Arc::new(MemoryConversationItemStorage::new()),
)
.await
.unwrap();
@@ -143,6 +146,7 @@ async fn test_openai_router_models() {
None,
Arc::new(MemoryResponseStorage::new()),
Arc::new(MemoryConversationStorage::new()),
Arc::new(sglang_router_rs::data_connector::MemoryConversationItemStorage::new()),
)
.await
.unwrap();
@@ -222,6 +226,7 @@ async fn test_openai_router_responses_with_mock() {
None,
storage.clone(),
Arc::new(MemoryConversationStorage::new()),
Arc::new(sglang_router_rs::data_connector::MemoryConversationItemStorage::new()),
)
.await
.unwrap();
@@ -482,6 +487,7 @@ async fn test_openai_router_responses_streaming_with_mock() {
None,
storage.clone(),
Arc::new(MemoryConversationStorage::new()),
Arc::new(sglang_router_rs::data_connector::MemoryConversationItemStorage::new()),
)
.await
.unwrap();
@@ -586,6 +592,7 @@ async fn test_unsupported_endpoints() {
None,
Arc::new(MemoryResponseStorage::new()),
Arc::new(MemoryConversationStorage::new()),
Arc::new(sglang_router_rs::data_connector::MemoryConversationItemStorage::new()),
)
.await
.unwrap();
@@ -627,6 +634,7 @@ async fn test_openai_router_chat_completion_with_mock() {
None,
Arc::new(MemoryResponseStorage::new()),
Arc::new(MemoryConversationStorage::new()),
Arc::new(sglang_router_rs::data_connector::MemoryConversationItemStorage::new()),
)
.await
.unwrap();
@@ -669,6 +677,7 @@ async fn test_openai_e2e_with_server() {
None,
Arc::new(MemoryResponseStorage::new()),
Arc::new(MemoryConversationStorage::new()),
Arc::new(sglang_router_rs::data_connector::MemoryConversationItemStorage::new()),
)
.await
.unwrap();
@@ -739,6 +748,7 @@ async fn test_openai_router_chat_streaming_with_mock() {
None,
Arc::new(MemoryResponseStorage::new()),
Arc::new(MemoryConversationStorage::new()),
Arc::new(sglang_router_rs::data_connector::MemoryConversationItemStorage::new()),
)
.await
.unwrap();
@@ -792,6 +802,7 @@ async fn test_openai_router_circuit_breaker() {
Some(cb_config),
Arc::new(MemoryResponseStorage::new()),
Arc::new(MemoryConversationStorage::new()),
Arc::new(sglang_router_rs::data_connector::MemoryConversationItemStorage::new()),
)
.await
.unwrap();
@@ -820,6 +831,7 @@ async fn test_openai_router_models_auth_forwarding() {
None,
Arc::new(MemoryResponseStorage::new()),
Arc::new(MemoryConversationStorage::new()),
Arc::new(sglang_router_rs::data_connector::MemoryConversationItemStorage::new()),
)
.await
.unwrap();