[Feature] Sglang Tracing: Fine-Grained Tracking for Request Latency - Part 1 (#9962)

Signed-off-by: Feng Su <sufeng@linux.alibaba.com>
Signed-off-by: Huaixin Chang <changhuaixin@linux.alibaba.com>
Signed-off-by: Peng Wang <rocking@linux.alibaba.com>
This commit is contained in:
Feng Su
2025-09-15 02:08:02 +08:00
committed by GitHub
parent 165abeebca
commit 4c21b09074
12 changed files with 1129 additions and 0 deletions

View File

@@ -33,6 +33,8 @@ import zmq
import zmq.asyncio
from PIL.Image import Image
from sglang.srt.tracing.trace import process_tracing_init, trace_set_thread_info
# Fix a bug of Python threading
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
@@ -138,6 +140,12 @@ class Engine(EngineBase):
context, zmq.DEALER, self.port_args.rpc_ipc_name, True
)
if server_args.enable_trace:
process_tracing_init(server_args.oltp_traces_endpoint, "sglang")
if server_args.disaggregation_mode == "null":
thread_label = "Tokenizer"
trace_set_thread_info(thread_label)
def generate(
self,
# The input prompt. It can be a single prompt or a batch of prompts.

View File

@@ -31,6 +31,8 @@ from typing import Any, AsyncIterator, Callable, Dict, List, Optional
import setproctitle
from sglang.srt.tracing.trace import process_tracing_init, trace_set_thread_info
# Fix a bug of Python threading
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
@@ -179,6 +181,13 @@ async def init_multi_tokenizer() -> ServerArgs:
scheduler_info=scheduler_info,
)
)
if server_args.enable_trace:
process_tracing_init(server_args.oltp_traces_endpoint, "sglang")
if server_args.disaggregation_mode == "null":
thread_label = f"MultiTokenizer-{tokenizer_manager.worker_id}"
trace_set_thread_info(thread_label)
return server_args
@@ -1203,6 +1212,12 @@ def launch_server(
server_args=server_args,
)
if server_args.enable_trace:
process_tracing_init(server_args.oltp_traces_endpoint, "sglang")
if server_args.disaggregation_mode == "null":
thread_label = "Tokenizer"
trace_set_thread_info(thread_label)
set_global_state(
_GlobalState(
tokenizer_manager=tokenizer_manager,