[router] allow one router to support different model families and serving mode (#10244)

This commit is contained in:
Simo Lin
2025-09-12 19:18:27 -04:00
committed by GitHub
parent 321fecab74
commit 2f173ea074
28 changed files with 3528 additions and 837 deletions

View File

@@ -197,12 +197,14 @@ async fn test_unsupported_endpoints() {
rid: None,
};
let response = router.route_generate(None, &generate_request).await;
let response = router.route_generate(None, &generate_request, None).await;
assert_eq!(response.status(), StatusCode::NOT_IMPLEMENTED);
// Test completion endpoint (should also not be supported)
let completion_request = create_minimal_completion_request();
let response = router.route_completion(None, &completion_request).await;
let response = router
.route_completion(None, &completion_request, None)
.await;
assert_eq!(response.status(), StatusCode::NOT_IMPLEMENTED);
}
@@ -228,7 +230,7 @@ async fn test_openai_router_chat_completion_with_mock() {
chat_request.temperature = Some(0.7);
// Route the request
let response = router.route_chat(None, &chat_request).await;
let response = router.route_chat(None, &chat_request, None).await;
// Should get a successful response from mock server
assert_eq!(response.status(), StatusCode::OK);
@@ -269,7 +271,9 @@ async fn test_openai_e2e_with_server() {
let chat_request: ChatCompletionRequest =
serde_json::from_str(&body_str).unwrap();
router.route_chat(Some(&parts.headers), &chat_request).await
router
.route_chat(Some(&parts.headers), &chat_request, None)
.await
}
}
}),
@@ -327,7 +331,7 @@ async fn test_openai_router_chat_streaming_with_mock() {
});
let chat_request: ChatCompletionRequest = serde_json::from_value(val).unwrap();
let response = router.route_chat(None, &chat_request).await;
let response = router.route_chat(None, &chat_request, None).await;
assert_eq!(response.status(), StatusCode::OK);
// Should be SSE
@@ -371,7 +375,7 @@ async fn test_openai_router_circuit_breaker() {
// First few requests should fail and record failures
for _ in 0..3 {
let response = router.route_chat(None, &chat_request).await;
let response = router.route_chat(None, &chat_request, None).await;
// Should get either an error or circuit breaker response
assert!(
response.status() == StatusCode::INTERNAL_SERVER_ERROR