2024-11-04 10:56:52 -08:00
use clap ::Parser ;
2024-11-10 21:57:32 -08:00
use clap ::ValueEnum ;
2024-10-28 09:49:48 -07:00
2024-11-25 13:36:02 -08:00
use sglang_router_rs ::{ router ::PolicyConfig , server , server ::ServerConfig } ;
2024-11-10 21:57:32 -08:00
#[ derive(Debug, Clone, ValueEnum) ]
pub enum PolicyType {
Random ,
RoundRobin ,
2024-11-23 08:34:48 -08:00
CacheAware ,
2024-11-10 21:57:32 -08:00
}
2024-10-28 09:49:48 -07:00
#[ derive(Parser, Debug) ]
#[ command(author, version, about, long_about = None) ]
struct Args {
2024-11-10 21:57:32 -08:00
#[ arg(
long ,
default_value = " 127.0.0.1 " ,
2024-11-23 08:34:48 -08:00
help = " Host address to bind the router server to. Default: 127.0.0.1 "
2024-11-10 21:57:32 -08:00
) ]
2024-10-28 09:49:48 -07:00
host : String ,
2024-11-23 08:34:48 -08:00
#[ arg(
long ,
default_value_t = 3001 ,
help = " Port number to bind the router server to. Default: 3001 "
) ]
2024-10-28 09:49:48 -07:00
port : u16 ,
2024-11-10 21:57:32 -08:00
#[ arg(
long ,
value_delimiter = ',' ,
2024-11-23 08:34:48 -08:00
help = " Comma-separated list of worker URLs that will handle the requests. Each URL should include the protocol, host, and port (e.g., http://worker1:8000,http://worker2:8000) "
2024-11-10 21:57:32 -08:00
) ]
2024-10-28 09:49:48 -07:00
worker_urls : Vec < String > ,
2024-11-10 21:57:32 -08:00
#[ arg(
long ,
2024-11-23 08:34:48 -08:00
default_value_t = PolicyType ::CacheAware ,
2024-11-10 21:57:32 -08:00
value_enum ,
2024-11-23 08:34:48 -08:00
help = " Load balancing policy to use for request distribution: \n \
- random : Randomly select workers \ n \
- round_robin : Distribute requests in round - robin fashion \ n \
2024-11-24 23:17:11 -08:00
- cache_aware : Distribute requests based on cache state and load balance \ n "
2024-11-10 21:57:32 -08:00
) ]
policy : PolicyType ,
#[ arg(
long ,
2024-11-23 08:34:48 -08:00
default_value_t = 0.5 ,
requires = " policy " ,
required_if_eq ( " policy " , " cache_aware " ) ,
help = " Cache threshold (0.0-1.0) for cache-aware routing. Routes to cached worker if the match rate exceeds threshold, otherwise routes to the worker with the smallest tree. Default: 0.5 "
) ]
cache_threshold : f32 ,
#[ arg(
long ,
2024-11-24 23:17:11 -08:00
default_value_t = 32 ,
2024-11-23 08:34:48 -08:00
requires = " policy " ,
required_if_eq ( " policy " , " cache_aware " ) ,
2024-11-24 23:17:11 -08:00
help = " Load balancing is triggered when (max_load - min_load) > abs_threshold AND max_load > min_load * rel_threshold. Otherwise, use cache aware. Default: 32 "
2024-11-23 08:34:48 -08:00
) ]
2024-11-24 23:17:11 -08:00
balance_abs_threshold : usize ,
#[ arg(
long ,
default_value_t = 1.0001 ,
requires = " policy " ,
required_if_eq ( " policy " , " cache_aware " ) ,
help = " Load balancing is triggered when (max_load - min_load) > abs_threshold AND max_load > min_load * rel_threshold. Otherwise, use cache aware. Default: 1.0001 "
) ]
balance_rel_threshold : f32 ,
2024-11-23 08:34:48 -08:00
#[ arg(
long ,
default_value_t = 60 ,
2024-11-10 21:57:32 -08:00
requires = " policy " ,
2024-11-23 08:34:48 -08:00
required_if_eq ( " policy " , " cache_aware " ) ,
help = " Interval in seconds between cache eviction operations in cache-aware routing. Default: 60 "
2024-11-10 21:57:32 -08:00
) ]
2024-11-23 08:34:48 -08:00
eviction_interval_secs : u64 ,
2024-11-10 21:57:32 -08:00
#[ arg(
long ,
2024-11-23 08:34:48 -08:00
default_value_t = 2 usize . pow ( 24 ) ,
2024-11-10 21:57:32 -08:00
requires = " policy " ,
2024-11-23 08:34:48 -08:00
required_if_eq ( " policy " , " cache_aware " ) ,
help = " Maximum size of the approximation tree for cache-aware routing. Default: 2^24 "
2024-11-10 21:57:32 -08:00
) ]
2024-11-23 08:34:48 -08:00
max_tree_size : usize ,
2024-11-25 13:36:02 -08:00
#[ arg(long, default_value_t = false, help = " Enable verbose logging " ) ]
verbose : bool ,
2024-11-10 21:57:32 -08:00
}
impl Args {
fn get_policy_config ( & self ) -> PolicyConfig {
match self . policy {
PolicyType ::Random = > PolicyConfig ::RandomConfig ,
PolicyType ::RoundRobin = > PolicyConfig ::RoundRobinConfig ,
2024-11-23 08:34:48 -08:00
PolicyType ::CacheAware = > PolicyConfig ::CacheAwareConfig {
cache_threshold : self . cache_threshold ,
2024-11-24 23:17:11 -08:00
balance_abs_threshold : self . balance_abs_threshold ,
balance_rel_threshold : self . balance_rel_threshold ,
2024-11-23 08:34:48 -08:00
eviction_interval_secs : self . eviction_interval_secs ,
max_tree_size : self . max_tree_size ,
2024-11-10 21:57:32 -08:00
} ,
}
}
2024-10-28 09:49:48 -07:00
}
#[ actix_web::main ]
async fn main ( ) -> std ::io ::Result < ( ) > {
let args = Args ::parse ( ) ;
2024-11-10 21:57:32 -08:00
let policy_config = args . get_policy_config ( ) ;
2024-11-25 13:36:02 -08:00
server ::startup ( ServerConfig {
host : args . host ,
port : args . port ,
worker_urls : args . worker_urls ,
policy_config ,
verbose : args . verbose ,
} )
. await
2024-11-04 10:56:52 -08:00
}