[router]: Add Embedding routing logic (#10129)

Signed-off-by: Jintao Zhang <zhangjintao9020@gmail.com>
Co-authored-by: Waël Boukhobza <wawa_wael@live.fr>
This commit is contained in:
Jintao Zhang
2025-09-15 09:44:35 +08:00
committed by GitHub
parent dcee42c200
commit f9ee6ae17a
17 changed files with 452 additions and 69 deletions

View File

@@ -143,6 +143,18 @@ pub fn init_metrics() {
"Generate request duration"
);
// Embedding request specific metrics
describe_counter!("sgl_router_embeddings_total", "Total embedding requests");
describe_histogram!(
"sgl_router_embeddings_duration_seconds",
"Embedding request duration"
);
describe_counter!(
"sgl_router_embeddings_errors_total",
"Embedding request errors"
);
describe_gauge!("sgl_router_embeddings_queue_size", "Embedding queue size");
// Running requests gauge for cache-aware policy
describe_gauge!(
"sgl_router_running_requests",
@@ -440,6 +452,27 @@ impl RouterMetrics {
histogram!("sgl_router_generate_duration_seconds").record(duration.as_secs_f64());
}
// Embeddings metrics
pub fn record_embeddings_request() {
counter!("sgl_router_embeddings_total").increment(1);
}
pub fn record_embeddings_duration(duration: Duration) {
histogram!("sgl_router_embeddings_duration_seconds").record(duration.as_secs_f64());
}
pub fn record_embeddings_error(error_type: &str) {
counter!(
"sgl_router_embeddings_errors_total",
"error_type" => error_type.to_string()
)
.increment(1);
}
pub fn set_embeddings_queue_size(size: usize) {
gauge!("sgl_router_embeddings_queue_size").set(size as f64);
}
// Running requests for cache-aware policy
pub fn set_running_requests(worker: &str, count: usize) {
gauge!("sgl_router_running_requests",