[router]: Add Embedding routing logic (#10129)
Signed-off-by: Jintao Zhang <zhangjintao9020@gmail.com> Co-authored-by: Waël Boukhobza <wawa_wael@live.fr>
This commit is contained in:
@@ -143,6 +143,18 @@ pub fn init_metrics() {
|
||||
"Generate request duration"
|
||||
);
|
||||
|
||||
// Embedding request specific metrics
|
||||
describe_counter!("sgl_router_embeddings_total", "Total embedding requests");
|
||||
describe_histogram!(
|
||||
"sgl_router_embeddings_duration_seconds",
|
||||
"Embedding request duration"
|
||||
);
|
||||
describe_counter!(
|
||||
"sgl_router_embeddings_errors_total",
|
||||
"Embedding request errors"
|
||||
);
|
||||
describe_gauge!("sgl_router_embeddings_queue_size", "Embedding queue size");
|
||||
|
||||
// Running requests gauge for cache-aware policy
|
||||
describe_gauge!(
|
||||
"sgl_router_running_requests",
|
||||
@@ -440,6 +452,27 @@ impl RouterMetrics {
|
||||
histogram!("sgl_router_generate_duration_seconds").record(duration.as_secs_f64());
|
||||
}
|
||||
|
||||
// Embeddings metrics
|
||||
pub fn record_embeddings_request() {
|
||||
counter!("sgl_router_embeddings_total").increment(1);
|
||||
}
|
||||
|
||||
pub fn record_embeddings_duration(duration: Duration) {
|
||||
histogram!("sgl_router_embeddings_duration_seconds").record(duration.as_secs_f64());
|
||||
}
|
||||
|
||||
pub fn record_embeddings_error(error_type: &str) {
|
||||
counter!(
|
||||
"sgl_router_embeddings_errors_total",
|
||||
"error_type" => error_type.to_string()
|
||||
)
|
||||
.increment(1);
|
||||
}
|
||||
|
||||
pub fn set_embeddings_queue_size(size: usize) {
|
||||
gauge!("sgl_router_embeddings_queue_size").set(size as f64);
|
||||
}
|
||||
|
||||
// Running requests for cache-aware policy
|
||||
pub fn set_running_requests(worker: &str, count: usize) {
|
||||
gauge!("sgl_router_running_requests",
|
||||
|
||||
Reference in New Issue
Block a user