[Feature] Sglang Tracing: Fine-Grained Tracking for Request Latency - Part 1 (#9962)

Signed-off-by: Feng Su <sufeng@linux.alibaba.com> Signed-off-by: Huaixin Chang <changhuaixin@linux.alibaba.com> Signed-off-by: Peng Wang <rocking@linux.alibaba.com>
2025-09-15 02:08:02 +08:00
parent 165abeebca
commit 4c21b09074
12 changed files with 1129 additions and 0 deletions
--- a/python/sglang/srt/entrypoints/engine.py
+++ b/python/sglang/srt/entrypoints/engine.py
@@ -33,6 +33,8 @@ import zmq
 import zmq.asyncio
 from PIL.Image import Image

+from sglang.srt.tracing.trace import process_tracing_init, trace_set_thread_info
+
 # Fix a bug of Python threading
 setattr(threading, "_register_atexit", lambda *args, **kwargs: None)

@@ -138,6 +140,12 @@ class Engine(EngineBase):
            context, zmq.DEALER, self.port_args.rpc_ipc_name, True
        )

+        if server_args.enable_trace:
+            process_tracing_init(server_args.oltp_traces_endpoint, "sglang")
+            if server_args.disaggregation_mode == "null":
+                thread_label = "Tokenizer"
+                trace_set_thread_info(thread_label)
+
    def generate(
        self,
        # The input prompt. It can be a single prompt or a batch of prompts.