[router] Basic OAI Response api (#10346)

This commit is contained in:
Keyang Ru
2025-09-11 20:56:17 -07:00
committed by GitHub
parent 27778010fc
commit a23bdeaf04
9 changed files with 245 additions and 5 deletions

View File

@@ -991,6 +991,91 @@ mod router_policy_tests {
}
}
#[cfg(test)]
mod responses_endpoint_tests {
use super::*;
#[tokio::test]
async fn test_v1_responses_non_streaming() {
let ctx = TestContext::new(vec![MockWorkerConfig {
port: 18950,
worker_type: WorkerType::Regular,
health_status: HealthStatus::Healthy,
response_delay_ms: 0,
fail_rate: 0.0,
}])
.await;
let app = ctx.create_app().await;
let payload = json!({
"input": "Hello Responses API",
"model": "mock-model",
"stream": false
});
let req = Request::builder()
.method("POST")
.uri("/v1/responses")
.header(CONTENT_TYPE, "application/json")
.body(Body::from(serde_json::to_string(&payload).unwrap()))
.unwrap();
let resp = app.clone().oneshot(req).await.unwrap();
assert_eq!(resp.status(), StatusCode::OK);
let body = axum::body::to_bytes(resp.into_body(), usize::MAX)
.await
.unwrap();
let body_json: serde_json::Value = serde_json::from_slice(&body).unwrap();
assert_eq!(body_json["object"], "response");
assert_eq!(body_json["status"], "completed");
ctx.shutdown().await;
}
#[tokio::test]
async fn test_v1_responses_streaming() {
let ctx = TestContext::new(vec![MockWorkerConfig {
port: 18951,
worker_type: WorkerType::Regular,
health_status: HealthStatus::Healthy,
response_delay_ms: 0,
fail_rate: 0.0,
}])
.await;
let app = ctx.create_app().await;
let payload = json!({
"input": "Hello Responses API",
"model": "mock-model",
"stream": true
});
let req = Request::builder()
.method("POST")
.uri("/v1/responses")
.header(CONTENT_TYPE, "application/json")
.body(Body::from(serde_json::to_string(&payload).unwrap()))
.unwrap();
let resp = app.clone().oneshot(req).await.unwrap();
assert_eq!(resp.status(), StatusCode::OK);
// Check that content-type indicates SSE
let headers = resp.headers().clone();
let ct = headers
.get("content-type")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
assert!(ct.contains("text/event-stream"));
// We don't fully consume the stream in this test harness.
ctx.shutdown().await;
}
}
#[cfg(test)]
mod error_tests {
use super::*;

View File

@@ -81,6 +81,7 @@ impl MockWorker {
.route("/generate", post(generate_handler))
.route("/v1/chat/completions", post(chat_completions_handler))
.route("/v1/completions", post(completions_handler))
.route("/v1/responses", post(responses_handler))
.route("/flush_cache", post(flush_cache_handler))
.route("/v1/models", get(v1_models_handler))
.with_state(config);
@@ -548,6 +549,91 @@ async fn completions_handler(
}
}
async fn responses_handler(
State(config): State<Arc<RwLock<MockWorkerConfig>>>,
Json(payload): Json<serde_json::Value>,
) -> Response {
let config = config.read().await;
if should_fail(&config).await {
return (
StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({
"error": {
"message": "Random failure for testing",
"type": "internal_error",
"code": "internal_error"
}
})),
)
.into_response();
}
if config.response_delay_ms > 0 {
tokio::time::sleep(tokio::time::Duration::from_millis(config.response_delay_ms)).await;
}
let is_stream = payload
.get("stream")
.and_then(|v| v.as_bool())
.unwrap_or(false);
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs() as i64;
if is_stream {
let request_id = format!("resp-{}", Uuid::new_v4());
let stream = stream::once(async move {
let chunk = json!({
"id": request_id,
"object": "response",
"created_at": timestamp,
"model": "mock-model",
"status": "in_progress",
"output": [{
"type": "message",
"role": "assistant",
"content": [{
"type": "output_text",
"text": "This is a mock responses streamed output."
}]
}]
});
Ok::<_, Infallible>(Event::default().data(chunk.to_string()))
})
.chain(stream::once(async { Ok(Event::default().data("[DONE]")) }));
Sse::new(stream)
.keep_alive(KeepAlive::default())
.into_response()
} else {
Json(json!({
"id": format!("resp-{}", Uuid::new_v4()),
"object": "response",
"created_at": timestamp,
"model": "mock-model",
"output": [{
"type": "message",
"role": "assistant",
"content": [{
"type": "output_text",
"text": "This is a mock responses output."
}]
}],
"status": "completed",
"usage": {
"input_tokens": 10,
"output_tokens": 5,
"total_tokens": 15
}
}))
.into_response()
}
}
async fn flush_cache_handler(State(config): State<Arc<RwLock<MockWorkerConfig>>>) -> Response {
let config = config.read().await;