[Feature] Sglang Tracing: Fine-Grained Tracking for Request Latency - Part 1 (#9962)
Signed-off-by: Feng Su <sufeng@linux.alibaba.com> Signed-off-by: Huaixin Chang <changhuaixin@linux.alibaba.com> Signed-off-by: Peng Wang <rocking@linux.alibaba.com>
This commit is contained in:
@@ -33,6 +33,8 @@ import zmq
|
||||
import zmq.asyncio
|
||||
from PIL.Image import Image
|
||||
|
||||
from sglang.srt.tracing.trace import process_tracing_init, trace_set_thread_info
|
||||
|
||||
# Fix a bug of Python threading
|
||||
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
|
||||
|
||||
@@ -138,6 +140,12 @@ class Engine(EngineBase):
|
||||
context, zmq.DEALER, self.port_args.rpc_ipc_name, True
|
||||
)
|
||||
|
||||
if server_args.enable_trace:
|
||||
process_tracing_init(server_args.oltp_traces_endpoint, "sglang")
|
||||
if server_args.disaggregation_mode == "null":
|
||||
thread_label = "Tokenizer"
|
||||
trace_set_thread_info(thread_label)
|
||||
|
||||
def generate(
|
||||
self,
|
||||
# The input prompt. It can be a single prompt or a batch of prompts.
|
||||
|
||||
Reference in New Issue
Block a user