[router] add get server info and get model info in grpc server (#11303)

2025-10-07 11:36:52 -04:00
parent 0958a39704
commit 2fcd56eaf6
7 changed files with 393 additions and 3 deletions
--- a/sgl-router/src/grpc_client/sglang_scheduler.rs
+++ b/sgl-router/src/grpc_client/sglang_scheduler.rs
@@ -97,6 +97,30 @@ impl SglangSchedulerClient {
        Ok(())
    }

+    /// Get model information
+    pub async fn get_model_info(
+        &mut self,
+    ) -> Result<proto::GetModelInfoResponse, Box<dyn std::error::Error + Send + Sync>> {
+        debug!("Requesting model info");
+        let request = Request::new(proto::GetModelInfoRequest {});
+
+        let response = self.client.get_model_info(request).await?;
+        debug!("Model info response received");
+        Ok(response.into_inner())
+    }
+
+    /// Get server information
+    pub async fn get_server_info(
+        &mut self,
+    ) -> Result<proto::GetServerInfoResponse, Box<dyn std::error::Error + Send + Sync>> {
+        debug!("Requesting server info");
+        let request = Request::new(proto::GetServerInfoRequest {});
+
+        let response = self.client.get_server_info(request).await?;
+        debug!("Server info response received");
+        Ok(response.into_inner())
+    }
+
    /// Build a single SGLang GenerateRequest from OpenAI ChatCompletionRequest
    pub fn build_generate_request(
        &self,
--- a/sgl-router/src/proto/sglang_scheduler.proto
+++ b/sgl-router/src/proto/sglang_scheduler.proto
@@ -20,6 +20,12 @@ service SglangScheduler {
  // Abort a running request
  rpc Abort(AbortRequest) returns (AbortResponse);

+  // Get model information
+  rpc GetModelInfo(GetModelInfoRequest) returns (GetModelInfoResponse);
+
+  // Get server information
+  rpc GetServerInfo(GetServerInfoRequest) returns (GetServerInfoResponse);
+
 }

 // =====================
@@ -401,3 +407,56 @@ message SetInternalStateResponse {
  bool success = 1;
  string message = 2;
 }
+
+// =====================
+// Model and Server Info
+// =====================
+
+// Get model information
+message GetModelInfoRequest {}
+
+message GetModelInfoResponse {
+  string model_path = 1;
+  string tokenizer_path = 2;
+  bool is_generation = 3;
+  string preferred_sampling_params = 4;  // JSON string or empty
+  string weight_version = 5;
+  string served_model_name = 6;
+  int32 max_context_length = 7;
+  int32 vocab_size = 8;
+  bool supports_vision = 9;
+  string model_type = 10;
+  repeated int32 eos_token_ids = 11;
+  int32 pad_token_id = 12;
+  int32 bos_token_id = 13;
+  int32 max_req_input_len = 14;
+}
+
+// Get server information
+message GetServerInfoRequest {}
+
+message GetServerInfoResponse {
+  // Server configuration (as structured data)
+  google.protobuf.Struct server_args = 1;
+
+  // Scheduler metrics (from scheduler initialization)
+  google.protobuf.Struct scheduler_info = 2;
+
+  // Runtime state
+  int32 active_requests = 3;
+  bool is_paused = 4;
+  double last_receive_timestamp = 5;
+  double uptime_seconds = 6;
+
+  // Version info
+  string sglang_version = 7;
+
+  // Server metadata
+  string server_type = 8;  // "grpc"
+  google.protobuf.Timestamp start_time = 9;
+
+  // Note: internal_states not provided in gRPC mode
+  // Scheduler-side metrics (memory usage, throughput) require
+  // bidirectional communicator infrastructure not available in gRPC.
+  // Use HTTP /get_server_info if scheduler internal state is needed.
+}