[router] add grpc router pd mode for chat and generate (#11140)
This commit is contained in:
@@ -31,6 +31,18 @@ pub trait Worker: Send + Sync + fmt::Debug {
|
||||
/// Get the worker's connection mode (HTTP or gRPC)
|
||||
fn connection_mode(&self) -> ConnectionMode;
|
||||
|
||||
/// Get the bootstrap hostname for PD mode
|
||||
/// Returns cached hostname parsed from URL at construction time
|
||||
fn bootstrap_host(&self) -> &str {
|
||||
&self.metadata().bootstrap_host
|
||||
}
|
||||
|
||||
/// Get the bootstrap port for PD mode
|
||||
/// Returns cached port from WorkerType::Prefill
|
||||
fn bootstrap_port(&self) -> Option<u16> {
|
||||
self.metadata().bootstrap_port
|
||||
}
|
||||
|
||||
/// Check if the worker is currently healthy
|
||||
fn is_healthy(&self) -> bool;
|
||||
|
||||
@@ -147,21 +159,6 @@ pub trait Worker: Send + Sync + fmt::Debug {
|
||||
true
|
||||
}
|
||||
|
||||
// TODO: - Enhanced Worker Discovery
|
||||
// The Worker trait should handle async discovery of metadata from the worker itself
|
||||
// rather than having service discovery or other components query /get_server_info.
|
||||
// This keeps service discovery decoupled from worker-specific APIs.
|
||||
//
|
||||
// Proposed additions:
|
||||
// - async fn discover_metadata(&mut self) -> Result<(), Error>
|
||||
// Query /get_server_info and populate metadata labels with model_id, priority, cost, etc.
|
||||
// - async fn validate_configuration(&self) -> Result<(), Error>
|
||||
// Ensure worker has required configuration for its mode (e.g., tokenizer for gRPC)
|
||||
// - Make worker creation async to allow metadata discovery during initialization
|
||||
//
|
||||
// This way service discovery just calls router.add_worker() and the worker
|
||||
// handles its own metadata discovery internally.
|
||||
|
||||
/// Get the model ID this worker serves
|
||||
fn model_id(&self) -> &str {
|
||||
self.metadata()
|
||||
@@ -325,6 +322,10 @@ pub struct WorkerMetadata {
|
||||
pub health_config: HealthConfig,
|
||||
/// API key
|
||||
pub api_key: Option<String>,
|
||||
/// Cached bootstrap hostname (parsed from URL at construction time)
|
||||
pub bootstrap_host: String,
|
||||
/// Cached bootstrap port (from WorkerType::Prefill)
|
||||
pub bootstrap_port: Option<u16>,
|
||||
}
|
||||
|
||||
/// Basic worker implementation
|
||||
|
||||
@@ -96,12 +96,29 @@ impl BasicWorkerBuilder {
|
||||
|
||||
/// Build the BasicWorker instance
|
||||
pub fn build(self) -> BasicWorker {
|
||||
use std::borrow::Cow;
|
||||
use std::sync::{
|
||||
atomic::{AtomicBool, AtomicUsize},
|
||||
Arc,
|
||||
};
|
||||
use tokio::sync::{Mutex, RwLock};
|
||||
|
||||
let url_to_parse = if self.url.contains("://") {
|
||||
Cow::from(&self.url)
|
||||
} else {
|
||||
Cow::from(format!("http://{}", self.url))
|
||||
};
|
||||
|
||||
let bootstrap_host = match url::Url::parse(&url_to_parse) {
|
||||
Ok(parsed) => parsed.host_str().unwrap_or("localhost").to_string(),
|
||||
Err(_) => "localhost".to_string(),
|
||||
};
|
||||
|
||||
let bootstrap_port = match self.worker_type {
|
||||
WorkerType::Prefill { bootstrap_port } => bootstrap_port,
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let metadata = WorkerMetadata {
|
||||
url: self.url.clone(),
|
||||
api_key: self.api_key,
|
||||
@@ -109,6 +126,8 @@ impl BasicWorkerBuilder {
|
||||
connection_mode: self.connection_mode,
|
||||
labels: self.labels,
|
||||
health_config: self.health_config,
|
||||
bootstrap_host,
|
||||
bootstrap_port,
|
||||
};
|
||||
|
||||
let grpc_client = Arc::new(RwLock::new(
|
||||
|
||||
Reference in New Issue
Block a user