[router] add get server info and get model info in grpc server (#11303)

This commit is contained in:
Simo Lin
2025-10-07 11:36:52 -04:00
committed by GitHub
parent 0958a39704
commit 2fcd56eaf6
7 changed files with 393 additions and 3 deletions

View File

@@ -97,6 +97,30 @@ impl SglangSchedulerClient {
Ok(())
}
/// Get model information
pub async fn get_model_info(
&mut self,
) -> Result<proto::GetModelInfoResponse, Box<dyn std::error::Error + Send + Sync>> {
debug!("Requesting model info");
let request = Request::new(proto::GetModelInfoRequest {});
let response = self.client.get_model_info(request).await?;
debug!("Model info response received");
Ok(response.into_inner())
}
/// Get server information
pub async fn get_server_info(
&mut self,
) -> Result<proto::GetServerInfoResponse, Box<dyn std::error::Error + Send + Sync>> {
debug!("Requesting server info");
let request = Request::new(proto::GetServerInfoRequest {});
let response = self.client.get_server_info(request).await?;
debug!("Server info response received");
Ok(response.into_inner())
}
/// Build a single SGLang GenerateRequest from OpenAI ChatCompletionRequest
pub fn build_generate_request(
&self,

View File

@@ -20,6 +20,12 @@ service SglangScheduler {
// Abort a running request
rpc Abort(AbortRequest) returns (AbortResponse);
// Get model information
rpc GetModelInfo(GetModelInfoRequest) returns (GetModelInfoResponse);
// Get server information
rpc GetServerInfo(GetServerInfoRequest) returns (GetServerInfoResponse);
}
// =====================
@@ -401,3 +407,56 @@ message SetInternalStateResponse {
bool success = 1;
string message = 2;
}
// =====================
// Model and Server Info
// =====================
// Get model information
message GetModelInfoRequest {}
message GetModelInfoResponse {
string model_path = 1;
string tokenizer_path = 2;
bool is_generation = 3;
string preferred_sampling_params = 4; // JSON string or empty
string weight_version = 5;
string served_model_name = 6;
int32 max_context_length = 7;
int32 vocab_size = 8;
bool supports_vision = 9;
string model_type = 10;
repeated int32 eos_token_ids = 11;
int32 pad_token_id = 12;
int32 bos_token_id = 13;
int32 max_req_input_len = 14;
}
// Get server information
message GetServerInfoRequest {}
message GetServerInfoResponse {
// Server configuration (as structured data)
google.protobuf.Struct server_args = 1;
// Scheduler metrics (from scheduler initialization)
google.protobuf.Struct scheduler_info = 2;
// Runtime state
int32 active_requests = 3;
bool is_paused = 4;
double last_receive_timestamp = 5;
double uptime_seconds = 6;
// Version info
string sglang_version = 7;
// Server metadata
string server_type = 8; // "grpc"
google.protobuf.Timestamp start_time = 9;
// Note: internal_states not provided in gRPC mode
// Scheduler-side metrics (memory usage, throughput) require
// bidirectional communicator infrastructure not available in gRPC.
// Use HTTP /get_server_info if scheduler internal state is needed.
}