[router] Replace print with logger (#2183)

This commit is contained in:
Byron Hsu
2024-11-25 13:36:02 -08:00
committed by GitHub
parent e1e595d702
commit 4d62bca542
10 changed files with 249 additions and 47 deletions

View File

@@ -22,6 +22,7 @@ struct Router {
balance_rel_threshold: f32,
eviction_interval_secs: u64,
max_tree_size: usize,
verbose: bool,
}
#[pymethods]
@@ -36,7 +37,8 @@ impl Router {
balance_abs_threshold = 32,
balance_rel_threshold = 1.0001,
eviction_interval_secs = 60,
max_tree_size = 2usize.pow(24)
max_tree_size = 2usize.pow(24),
verbose = false
))]
fn new(
worker_urls: Vec<String>,
@@ -48,6 +50,7 @@ impl Router {
balance_rel_threshold: f32,
eviction_interval_secs: u64,
max_tree_size: usize,
verbose: bool,
) -> PyResult<Self> {
Ok(Router {
host,
@@ -59,14 +62,11 @@ impl Router {
balance_rel_threshold,
eviction_interval_secs,
max_tree_size,
verbose,
})
}
fn start(&self) -> PyResult<()> {
let host = self.host.clone();
let port = self.port;
let worker_urls = self.worker_urls.clone();
let policy_config = match &self.policy {
PolicyType::Random => router::PolicyConfig::RandomConfig,
PolicyType::RoundRobin => router::PolicyConfig::RoundRobinConfig,
@@ -80,9 +80,15 @@ impl Router {
};
actix_web::rt::System::new().block_on(async move {
server::startup(host, port, worker_urls, policy_config)
.await
.unwrap();
server::startup(server::ServerConfig {
host: self.host.clone(),
port: self.port,
worker_urls: self.worker_urls.clone(),
policy_config,
verbose: self.verbose,
})
.await
.unwrap();
});
Ok(())

View File

@@ -1,7 +1,7 @@
use clap::Parser;
use clap::ValueEnum;
use sglang_router_rs::{router::PolicyConfig, server};
use sglang_router_rs::{router::PolicyConfig, server, server::ServerConfig};
#[derive(Debug, Clone, ValueEnum)]
pub enum PolicyType {
@@ -89,6 +89,9 @@ struct Args {
help = "Maximum size of the approximation tree for cache-aware routing. Default: 2^24"
)]
max_tree_size: usize,
#[arg(long, default_value_t = false, help = "Enable verbose logging")]
verbose: bool,
}
impl Args {
@@ -111,5 +114,12 @@ impl Args {
async fn main() -> std::io::Result<()> {
let args = Args::parse();
let policy_config = args.get_policy_config();
server::startup(args.host, args.port, args.worker_urls, policy_config).await
server::startup(ServerConfig {
host: args.host,
port: args.port,
worker_urls: args.worker_urls,
policy_config,
verbose: args.verbose,
})
.await
}

View File

@@ -3,6 +3,7 @@ use actix_web::http::header::{HeaderValue, CONTENT_TYPE};
use actix_web::{HttpRequest, HttpResponse};
use bytes::Bytes;
use futures_util::{Stream, StreamExt, TryStreamExt};
use log::{debug, info};
use std::collections::HashMap;
use std::fmt::Debug;
use std::hash::Hash;
@@ -171,11 +172,11 @@ impl Router {
// Print the process queue
let locked_processed_queue = processed_queue_clone.lock().unwrap();
println!("Processed Queue: {:?}", locked_processed_queue);
info!("Processed Queue: {:?}", locked_processed_queue);
// Print the running queue
let locked_running_queue = running_queue_clone.lock().unwrap();
println!("Running Queue: {:?}", locked_running_queue);
info!("Running Queue: {:?}", locked_running_queue);
}
});
@@ -266,7 +267,7 @@ impl Router {
let selected_url = if is_imbalanced {
// Log load balancing trigger and current queue state
println!(
info!(
"Load balancing triggered due to workload imbalance:\n\
Max load: {}, Min load: {}\n\
Current running queue: {:?}",
@@ -368,8 +369,7 @@ impl Router {
let mut locked_queue = running_queue.lock().unwrap();
let count = locked_queue.get_mut(&worker_url).unwrap();
*count = count.saturating_sub(1);
// print
// println!("streaming is done!!")
debug!("streaming is done!!")
}
}),
)

View File

@@ -2,6 +2,9 @@ use crate::router::PolicyConfig;
use crate::router::Router;
use actix_web::{get, post, web, App, HttpRequest, HttpResponse, HttpServer, Responder};
use bytes::Bytes;
use env_logger::Builder;
use log::{debug, info, LevelFilter};
use std::io::Write;
#[derive(Debug)]
pub struct AppState {
@@ -125,23 +128,49 @@ async fn v1_completions(
.await
}
pub async fn startup(
host: String,
port: u16,
worker_urls: Vec<String>,
policy_config: PolicyConfig,
) -> std::io::Result<()> {
println!("Starting server on {}:{}", host, port);
println!("Worker URLs: {:?}", worker_urls);
println!("Policy Config: {:?}", policy_config);
pub struct ServerConfig {
pub host: String,
pub port: u16,
pub worker_urls: Vec<String>,
pub policy_config: PolicyConfig,
pub verbose: bool,
}
pub async fn startup(config: ServerConfig) -> std::io::Result<()> {
Builder::new()
.format(|buf, record| {
use chrono::Local;
writeln!(
buf,
"[Router (Rust)] {} - {} - {}",
Local::now().format("%Y-%m-%d %H:%M:%S"),
record.level(),
record.args()
)
})
.filter(
None,
if config.verbose {
LevelFilter::Debug
} else {
LevelFilter::Info
},
)
.init();
info!("Starting server on {}:{}", config.host, config.port);
info!("Worker URLs: {:?}", config.worker_urls);
info!("Policy Config: {:?}", config.policy_config);
// Create client once with configuration
let client = reqwest::Client::builder()
.build()
.expect("Failed to create HTTP client");
// Store both worker_urls and client in AppState
let app_state = web::Data::new(AppState::new(worker_urls, client, policy_config));
let app_state = web::Data::new(AppState::new(
config.worker_urls,
client,
config.policy_config,
));
HttpServer::new(move || {
App::new()
@@ -155,7 +184,7 @@ pub async fn startup(
.service(health_generate)
.service(get_server_info)
})
.bind((host, port))?
.bind((config.host, config.port))?
.run()
.await
}

View File

@@ -1,5 +1,6 @@
use dashmap::mapref::entry::Entry;
use dashmap::DashMap;
use log::info;
use rand::distributions::{Alphanumeric, DistString};
use rand::thread_rng;
use std::cmp::min;
@@ -434,9 +435,9 @@ impl Tree {
}
}
println!("Before eviction - Used size per tenant:");
info!("Before eviction - Used size per tenant:");
for (tenant, size) in &used_size_per_tenant {
println!("Tenant: {}, Size: {}", tenant, size);
info!("Tenant: {}, Size: {}", tenant, size);
}
// Process eviction
@@ -490,9 +491,9 @@ impl Tree {
}
}
println!("\nAfter eviction - Used size per tenant:");
info!("After eviction - Used size per tenant:");
for (tenant, size) in &used_size_per_tenant {
println!("Tenant: {}, Size: {}", tenant, size);
info!("Tenant: {}, Size: {}", tenant, size);
}
}