sglang/sgl-router/src/protocols/worker_spec.rs

//! Worker management API specifications
//!
//! Defines the request/response structures for worker management endpoints

use std::collections::HashMap;

use serde::{Deserialize, Serialize};

/// Worker configuration for API requests
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct WorkerConfigRequest {
    /// Worker URL (required)
    pub url: String,

    /// Worker API key (optional)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub api_key: Option<String>,

    /// Model ID (optional, will query from server if not provided)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub model_id: Option<String>,

    /// Worker priority (optional, default: 50, higher = preferred)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub priority: Option<u32>,

    /// Worker cost factor (optional, default: 1.0)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cost: Option<f32>,

    /// Worker type (optional: "regular", "prefill", "decode")
    #[serde(skip_serializing_if = "Option::is_none")]
    pub worker_type: Option<String>,

    /// Bootstrap port for prefill workers (optional)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub bootstrap_port: Option<u16>,

    // gRPC-specific configuration (optional, ignored in HTTP mode)
    /// Tokenizer path for gRPC mode
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tokenizer_path: Option<String>,

    /// Reasoning parser type for gRPC mode
    #[serde(skip_serializing_if = "Option::is_none")]
    pub reasoning_parser: Option<String>,

    /// Tool parser type for gRPC mode
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_parser: Option<String>,

    /// Chat template for gRPC mode
    #[serde(skip_serializing_if = "Option::is_none")]
    pub chat_template: Option<String>,

    /// Additional labels (optional)
    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
    pub labels: HashMap<String, String>,

    /// Health check timeout in seconds (default: 30)
    #[serde(default = "default_health_check_timeout")]
    pub health_check_timeout_secs: u64,

    /// Health check interval in seconds (default: 60)
    #[serde(default = "default_health_check_interval")]
    pub health_check_interval_secs: u64,

    /// Number of successful health checks needed to mark worker as healthy (default: 2)
    #[serde(default = "default_health_success_threshold")]
    pub health_success_threshold: u32,

    /// Number of failed health checks before marking worker as unhealthy (default: 3)
    #[serde(default = "default_health_failure_threshold")]
    pub health_failure_threshold: u32,

    /// Maximum connection attempts during worker registration (default: 20)
    #[serde(default = "default_max_connection_attempts")]
    pub max_connection_attempts: u32,

    /// Enable data parallelism aware scheduling (default: false)
    #[serde(default)]
    pub dp_aware: bool,
}

// Default value functions for serde
fn default_health_check_timeout() -> u64 {
    30
}

fn default_health_check_interval() -> u64 {
    60
}

fn default_health_success_threshold() -> u32 {
    2
}

fn default_health_failure_threshold() -> u32 {
    3
}

fn default_max_connection_attempts() -> u32 {
    20
}

/// Worker information for API responses
#[derive(Debug, Clone, Serialize)]
pub struct WorkerInfo {
    /// Worker unique identifier
    pub id: String,

    /// Worker URL
    pub url: String,

    /// Model ID this worker serves
    pub model_id: String,

    /// Worker priority
    pub priority: u32,

    /// Worker cost factor
    pub cost: f32,

    /// Worker type
    pub worker_type: String,

    /// Whether the worker is healthy
    pub is_healthy: bool,

    /// Current load on the worker
    pub load: usize,

    /// Connection mode (http or grpc)
    pub connection_mode: String,

    // gRPC-specific fields (None for HTTP workers)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tokenizer_path: Option<String>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub reasoning_parser: Option<String>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_parser: Option<String>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub chat_template: Option<String>,

    /// Bootstrap port for prefill workers
    #[serde(skip_serializing_if = "Option::is_none")]
    pub bootstrap_port: Option<u16>,

    /// Additional metadata
    #[serde(skip_serializing_if = "HashMap::is_empty")]
    pub metadata: HashMap<String, String>,

    /// Job status for async operations (if available)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub job_status: Option<JobStatus>,
}

/// Job status for async control plane operations
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JobStatus {
    pub job_type: String,
    pub worker_url: String,
    pub status: String,
    pub message: Option<String>,
    pub timestamp: u64,
}

/// Worker list response
#[derive(Debug, Clone, Serialize)]
pub struct WorkerListResponse {
    /// List of workers
    pub workers: Vec<WorkerInfo>,

    /// Total count
    pub total: usize,

    /// Statistics
    pub stats: WorkerStats,
}

/// Worker statistics
#[derive(Debug, Clone, Serialize)]
pub struct WorkerStats {
    pub total_workers: usize,
    pub healthy_workers: usize,
    pub total_models: usize,
    pub total_load: usize,
    pub by_type: WorkerTypeStats,
}

/// Worker statistics by type
#[derive(Debug, Clone, Serialize)]
pub struct WorkerTypeStats {
    pub regular: usize,
    pub prefill: usize,
    pub decode: usize,
}

/// Worker update request
#[derive(Debug, Clone, Deserialize)]
pub struct WorkerUpdateRequest {
    /// Update priority
    #[serde(skip_serializing_if = "Option::is_none")]
    pub priority: Option<u32>,

    /// Update cost
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cost: Option<f32>,

    /// Update labels
    #[serde(skip_serializing_if = "Option::is_none")]
    pub labels: Option<HashMap<String, String>>,
}

/// Generic API response
#[derive(Debug, Clone, Serialize)]
pub struct WorkerApiResponse {
    pub success: bool,
    pub message: String,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub worker: Option<WorkerInfo>,
}

/// Error response
#[derive(Debug, Clone, Serialize)]
pub struct WorkerErrorResponse {
    pub error: String,
    pub code: String,
}

/// Server info response from /get_server_info endpoint
#[derive(Debug, Clone, Deserialize)]
pub struct ServerInfo {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub model_id: Option<String>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub model_path: Option<String>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub priority: Option<u32>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub cost: Option<f32>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub worker_type: Option<String>,

    // gRPC-specific
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tokenizer_path: Option<String>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub reasoning_parser: Option<String>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_parser: Option<String>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub chat_template: Option<String>,
}

/// Result from flush cache operations across workers
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct FlushCacheResult {
    /// URLs of workers where cache flush succeeded
    pub successful: Vec<String>,
    /// URLs and error messages for workers where cache flush failed
    pub failed: Vec<(String, String)>,
    /// Total number of workers attempted
    pub total_workers: usize,
    /// Number of HTTP workers (gRPC workers don't support flush cache)
    pub http_workers: usize,
    /// Human-readable summary message
    pub message: String,
}

/// Result from getting worker loads
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct WorkerLoadsResult {
    /// Worker URL and load pairs
    pub loads: Vec<WorkerLoadInfo>,
    /// Total number of workers
    pub total_workers: usize,
    /// Number of workers with successful load fetches
    pub successful: usize,
    /// Number of workers with failed load fetches
    pub failed: usize,
}

/// Individual worker load information
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct WorkerLoadInfo {
    /// Worker URL
    pub worker: String,
    /// Worker type (regular, prefill, decode)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub worker_type: Option<String>,
    /// Current load (-1 indicates failure to fetch)
    pub load: isize,
}