[router] Replace print with logger (#2183)
This commit is contained in:
@@ -22,6 +22,7 @@ struct Router {
|
||||
balance_rel_threshold: f32,
|
||||
eviction_interval_secs: u64,
|
||||
max_tree_size: usize,
|
||||
verbose: bool,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
@@ -36,7 +37,8 @@ impl Router {
|
||||
balance_abs_threshold = 32,
|
||||
balance_rel_threshold = 1.0001,
|
||||
eviction_interval_secs = 60,
|
||||
max_tree_size = 2usize.pow(24)
|
||||
max_tree_size = 2usize.pow(24),
|
||||
verbose = false
|
||||
))]
|
||||
fn new(
|
||||
worker_urls: Vec<String>,
|
||||
@@ -48,6 +50,7 @@ impl Router {
|
||||
balance_rel_threshold: f32,
|
||||
eviction_interval_secs: u64,
|
||||
max_tree_size: usize,
|
||||
verbose: bool,
|
||||
) -> PyResult<Self> {
|
||||
Ok(Router {
|
||||
host,
|
||||
@@ -59,14 +62,11 @@ impl Router {
|
||||
balance_rel_threshold,
|
||||
eviction_interval_secs,
|
||||
max_tree_size,
|
||||
verbose,
|
||||
})
|
||||
}
|
||||
|
||||
fn start(&self) -> PyResult<()> {
|
||||
let host = self.host.clone();
|
||||
let port = self.port;
|
||||
let worker_urls = self.worker_urls.clone();
|
||||
|
||||
let policy_config = match &self.policy {
|
||||
PolicyType::Random => router::PolicyConfig::RandomConfig,
|
||||
PolicyType::RoundRobin => router::PolicyConfig::RoundRobinConfig,
|
||||
@@ -80,9 +80,15 @@ impl Router {
|
||||
};
|
||||
|
||||
actix_web::rt::System::new().block_on(async move {
|
||||
server::startup(host, port, worker_urls, policy_config)
|
||||
.await
|
||||
.unwrap();
|
||||
server::startup(server::ServerConfig {
|
||||
host: self.host.clone(),
|
||||
port: self.port,
|
||||
worker_urls: self.worker_urls.clone(),
|
||||
policy_config,
|
||||
verbose: self.verbose,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
});
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use clap::Parser;
|
||||
use clap::ValueEnum;
|
||||
|
||||
use sglang_router_rs::{router::PolicyConfig, server};
|
||||
use sglang_router_rs::{router::PolicyConfig, server, server::ServerConfig};
|
||||
|
||||
#[derive(Debug, Clone, ValueEnum)]
|
||||
pub enum PolicyType {
|
||||
@@ -89,6 +89,9 @@ struct Args {
|
||||
help = "Maximum size of the approximation tree for cache-aware routing. Default: 2^24"
|
||||
)]
|
||||
max_tree_size: usize,
|
||||
|
||||
#[arg(long, default_value_t = false, help = "Enable verbose logging")]
|
||||
verbose: bool,
|
||||
}
|
||||
|
||||
impl Args {
|
||||
@@ -111,5 +114,12 @@ impl Args {
|
||||
async fn main() -> std::io::Result<()> {
|
||||
let args = Args::parse();
|
||||
let policy_config = args.get_policy_config();
|
||||
server::startup(args.host, args.port, args.worker_urls, policy_config).await
|
||||
server::startup(ServerConfig {
|
||||
host: args.host,
|
||||
port: args.port,
|
||||
worker_urls: args.worker_urls,
|
||||
policy_config,
|
||||
verbose: args.verbose,
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ use actix_web::http::header::{HeaderValue, CONTENT_TYPE};
|
||||
use actix_web::{HttpRequest, HttpResponse};
|
||||
use bytes::Bytes;
|
||||
use futures_util::{Stream, StreamExt, TryStreamExt};
|
||||
use log::{debug, info};
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Debug;
|
||||
use std::hash::Hash;
|
||||
@@ -171,11 +172,11 @@ impl Router {
|
||||
|
||||
// Print the process queue
|
||||
let locked_processed_queue = processed_queue_clone.lock().unwrap();
|
||||
println!("Processed Queue: {:?}", locked_processed_queue);
|
||||
info!("Processed Queue: {:?}", locked_processed_queue);
|
||||
|
||||
// Print the running queue
|
||||
let locked_running_queue = running_queue_clone.lock().unwrap();
|
||||
println!("Running Queue: {:?}", locked_running_queue);
|
||||
info!("Running Queue: {:?}", locked_running_queue);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -266,7 +267,7 @@ impl Router {
|
||||
|
||||
let selected_url = if is_imbalanced {
|
||||
// Log load balancing trigger and current queue state
|
||||
println!(
|
||||
info!(
|
||||
"Load balancing triggered due to workload imbalance:\n\
|
||||
Max load: {}, Min load: {}\n\
|
||||
Current running queue: {:?}",
|
||||
@@ -368,8 +369,7 @@ impl Router {
|
||||
let mut locked_queue = running_queue.lock().unwrap();
|
||||
let count = locked_queue.get_mut(&worker_url).unwrap();
|
||||
*count = count.saturating_sub(1);
|
||||
// print
|
||||
// println!("streaming is done!!")
|
||||
debug!("streaming is done!!")
|
||||
}
|
||||
}),
|
||||
)
|
||||
|
||||
@@ -2,6 +2,9 @@ use crate::router::PolicyConfig;
|
||||
use crate::router::Router;
|
||||
use actix_web::{get, post, web, App, HttpRequest, HttpResponse, HttpServer, Responder};
|
||||
use bytes::Bytes;
|
||||
use env_logger::Builder;
|
||||
use log::{debug, info, LevelFilter};
|
||||
use std::io::Write;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct AppState {
|
||||
@@ -125,23 +128,49 @@ async fn v1_completions(
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn startup(
|
||||
host: String,
|
||||
port: u16,
|
||||
worker_urls: Vec<String>,
|
||||
policy_config: PolicyConfig,
|
||||
) -> std::io::Result<()> {
|
||||
println!("Starting server on {}:{}", host, port);
|
||||
println!("Worker URLs: {:?}", worker_urls);
|
||||
println!("Policy Config: {:?}", policy_config);
|
||||
pub struct ServerConfig {
|
||||
pub host: String,
|
||||
pub port: u16,
|
||||
pub worker_urls: Vec<String>,
|
||||
pub policy_config: PolicyConfig,
|
||||
pub verbose: bool,
|
||||
}
|
||||
|
||||
pub async fn startup(config: ServerConfig) -> std::io::Result<()> {
|
||||
Builder::new()
|
||||
.format(|buf, record| {
|
||||
use chrono::Local;
|
||||
writeln!(
|
||||
buf,
|
||||
"[Router (Rust)] {} - {} - {}",
|
||||
Local::now().format("%Y-%m-%d %H:%M:%S"),
|
||||
record.level(),
|
||||
record.args()
|
||||
)
|
||||
})
|
||||
.filter(
|
||||
None,
|
||||
if config.verbose {
|
||||
LevelFilter::Debug
|
||||
} else {
|
||||
LevelFilter::Info
|
||||
},
|
||||
)
|
||||
.init();
|
||||
|
||||
info!("Starting server on {}:{}", config.host, config.port);
|
||||
info!("Worker URLs: {:?}", config.worker_urls);
|
||||
info!("Policy Config: {:?}", config.policy_config);
|
||||
|
||||
// Create client once with configuration
|
||||
let client = reqwest::Client::builder()
|
||||
.build()
|
||||
.expect("Failed to create HTTP client");
|
||||
|
||||
// Store both worker_urls and client in AppState
|
||||
let app_state = web::Data::new(AppState::new(worker_urls, client, policy_config));
|
||||
let app_state = web::Data::new(AppState::new(
|
||||
config.worker_urls,
|
||||
client,
|
||||
config.policy_config,
|
||||
));
|
||||
|
||||
HttpServer::new(move || {
|
||||
App::new()
|
||||
@@ -155,7 +184,7 @@ pub async fn startup(
|
||||
.service(health_generate)
|
||||
.service(get_server_info)
|
||||
})
|
||||
.bind((host, port))?
|
||||
.bind((config.host, config.port))?
|
||||
.run()
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use dashmap::mapref::entry::Entry;
|
||||
use dashmap::DashMap;
|
||||
use log::info;
|
||||
use rand::distributions::{Alphanumeric, DistString};
|
||||
use rand::thread_rng;
|
||||
use std::cmp::min;
|
||||
@@ -434,9 +435,9 @@ impl Tree {
|
||||
}
|
||||
}
|
||||
|
||||
println!("Before eviction - Used size per tenant:");
|
||||
info!("Before eviction - Used size per tenant:");
|
||||
for (tenant, size) in &used_size_per_tenant {
|
||||
println!("Tenant: {}, Size: {}", tenant, size);
|
||||
info!("Tenant: {}, Size: {}", tenant, size);
|
||||
}
|
||||
|
||||
// Process eviction
|
||||
@@ -490,9 +491,9 @@ impl Tree {
|
||||
}
|
||||
}
|
||||
|
||||
println!("\nAfter eviction - Used size per tenant:");
|
||||
info!("After eviction - Used size per tenant:");
|
||||
for (tenant, size) in &used_size_per_tenant {
|
||||
println!("Tenant: {}, Size: {}", tenant, size);
|
||||
info!("Tenant: {}, Size: {}", tenant, size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user