[Feature] Sglang Tracing: Fine-Grained Tracking for Request Latency - Part 1 (#9962)

Signed-off-by: Feng Su <sufeng@linux.alibaba.com>
Signed-off-by: Huaixin Chang <changhuaixin@linux.alibaba.com>
Signed-off-by: Peng Wang <rocking@linux.alibaba.com>
This commit is contained in:
Feng Su
2025-09-15 02:08:02 +08:00
committed by GitHub
parent 165abeebca
commit 4c21b09074
12 changed files with 1129 additions and 0 deletions

View File

@@ -33,6 +33,8 @@ import zmq
import zmq.asyncio
from PIL.Image import Image
from sglang.srt.tracing.trace import process_tracing_init, trace_set_thread_info
# Fix a bug of Python threading
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
@@ -138,6 +140,12 @@ class Engine(EngineBase):
context, zmq.DEALER, self.port_args.rpc_ipc_name, True
)
if server_args.enable_trace:
process_tracing_init(server_args.oltp_traces_endpoint, "sglang")
if server_args.disaggregation_mode == "null":
thread_label = "Tokenizer"
trace_set_thread_info(thread_label)
def generate(
self,
# The input prompt. It can be a single prompt or a batch of prompts.