[router] allow one router to support different model families and serving mode (#10244)
This commit is contained in:
@@ -3,3 +3,4 @@
|
||||
|
||||
pub mod spec;
|
||||
pub mod validation;
|
||||
pub mod worker_spec;
|
||||
|
||||
198
sgl-router/src/protocols/worker_spec.rs
Normal file
198
sgl-router/src/protocols/worker_spec.rs
Normal file
@@ -0,0 +1,198 @@
|
||||
//! Worker management API specifications
|
||||
//!
|
||||
//! Defines the request/response structures for worker management endpoints
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Worker configuration for API requests
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct WorkerConfigRequest {
|
||||
/// Worker URL (required)
|
||||
pub url: String,
|
||||
|
||||
/// Model ID (optional, will query from server if not provided)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub model_id: Option<String>,
|
||||
|
||||
/// Worker priority (optional, default: 50, higher = preferred)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub priority: Option<u32>,
|
||||
|
||||
/// Worker cost factor (optional, default: 1.0)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub cost: Option<f32>,
|
||||
|
||||
/// Worker type (optional: "regular", "prefill", "decode")
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub worker_type: Option<String>,
|
||||
|
||||
/// Bootstrap port for prefill workers (optional)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub bootstrap_port: Option<u16>,
|
||||
|
||||
// gRPC-specific configuration (optional, ignored in HTTP mode)
|
||||
/// Tokenizer path for gRPC mode
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub tokenizer_path: Option<String>,
|
||||
|
||||
/// Reasoning parser type for gRPC mode
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub reasoning_parser: Option<String>,
|
||||
|
||||
/// Tool parser type for gRPC mode
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub tool_parser: Option<String>,
|
||||
|
||||
/// Chat template for gRPC mode
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub chat_template: Option<String>,
|
||||
|
||||
/// Additional labels (optional)
|
||||
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
|
||||
pub labels: HashMap<String, String>,
|
||||
}
|
||||
|
||||
/// Worker information for API responses
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct WorkerInfo {
|
||||
/// Worker unique identifier
|
||||
pub id: String,
|
||||
|
||||
/// Worker URL
|
||||
pub url: String,
|
||||
|
||||
/// Model ID this worker serves
|
||||
pub model_id: String,
|
||||
|
||||
/// Worker priority
|
||||
pub priority: u32,
|
||||
|
||||
/// Worker cost factor
|
||||
pub cost: f32,
|
||||
|
||||
/// Worker type
|
||||
pub worker_type: String,
|
||||
|
||||
/// Whether the worker is healthy
|
||||
pub is_healthy: bool,
|
||||
|
||||
/// Current load on the worker
|
||||
pub load: usize,
|
||||
|
||||
/// Connection mode (http or grpc)
|
||||
pub connection_mode: String,
|
||||
|
||||
// gRPC-specific fields (None for HTTP workers)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub tokenizer_path: Option<String>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub reasoning_parser: Option<String>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub tool_parser: Option<String>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub chat_template: Option<String>,
|
||||
|
||||
/// Additional metadata
|
||||
#[serde(skip_serializing_if = "HashMap::is_empty")]
|
||||
pub metadata: HashMap<String, String>,
|
||||
}
|
||||
|
||||
/// Worker list response
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct WorkerListResponse {
|
||||
/// List of workers
|
||||
pub workers: Vec<WorkerInfo>,
|
||||
|
||||
/// Total count
|
||||
pub total: usize,
|
||||
|
||||
/// Statistics
|
||||
pub stats: WorkerStats,
|
||||
}
|
||||
|
||||
/// Worker statistics
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct WorkerStats {
|
||||
pub total_workers: usize,
|
||||
pub healthy_workers: usize,
|
||||
pub total_models: usize,
|
||||
pub total_load: usize,
|
||||
pub by_type: WorkerTypeStats,
|
||||
}
|
||||
|
||||
/// Worker statistics by type
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct WorkerTypeStats {
|
||||
pub regular: usize,
|
||||
pub prefill: usize,
|
||||
pub decode: usize,
|
||||
}
|
||||
|
||||
/// Worker update request
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct WorkerUpdateRequest {
|
||||
/// Update priority
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub priority: Option<u32>,
|
||||
|
||||
/// Update cost
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub cost: Option<f32>,
|
||||
|
||||
/// Update labels
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub labels: Option<HashMap<String, String>>,
|
||||
}
|
||||
|
||||
/// Generic API response
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct WorkerApiResponse {
|
||||
pub success: bool,
|
||||
pub message: String,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub worker: Option<WorkerInfo>,
|
||||
}
|
||||
|
||||
/// Error response
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct WorkerErrorResponse {
|
||||
pub error: String,
|
||||
pub code: String,
|
||||
}
|
||||
|
||||
/// Server info response from /get_server_info endpoint
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ServerInfo {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub model_id: Option<String>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub model_path: Option<String>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub priority: Option<u32>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub cost: Option<f32>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub worker_type: Option<String>,
|
||||
|
||||
// gRPC-specific
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub tokenizer_path: Option<String>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub reasoning_parser: Option<String>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub tool_parser: Option<String>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub chat_template: Option<String>,
|
||||
}
|
||||
Reference in New Issue
Block a user