Correctly abort the failed grammar requests & Improve the handling of abort (#6803)

This commit is contained in:
Lianmin Zheng
2025-06-01 19:00:07 -07:00
committed by GitHub
parent 6a47b73024
commit 20fd53b8f6
16 changed files with 199 additions and 142 deletions

View File

@@ -402,6 +402,12 @@ class TokenizerMetricsCollector:
labelnames=labels.keys(),
)
self.num_aborted_requests_total = Counter(
name="sglang:num_aborted_requests",
documentation="Number of requests aborted.",
labelnames=labels.keys(),
)
if bucket_time_to_first_token is None:
bucket_time_to_first_token = [
0.1,
@@ -533,3 +539,6 @@ class TokenizerMetricsCollector:
if adjusted_interval <= bound:
his._buckets[i].inc(num_new_tokens)
break
def observe_one_aborted_request(self):
self.num_aborted_requests_total.labels(**self.labels).inc(1)