[Feature] Sglang Tracing: Fine-Grained Tracking for Request Latency - Part 1 (#9962)
Signed-off-by: Feng Su <sufeng@linux.alibaba.com> Signed-off-by: Huaixin Chang <changhuaixin@linux.alibaba.com> Signed-off-by: Peng Wang <rocking@linux.alibaba.com>
This commit is contained in:
@@ -33,6 +33,8 @@ import zmq
|
||||
import zmq.asyncio
|
||||
from PIL.Image import Image
|
||||
|
||||
from sglang.srt.tracing.trace import process_tracing_init, trace_set_thread_info
|
||||
|
||||
# Fix a bug of Python threading
|
||||
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
|
||||
|
||||
@@ -138,6 +140,12 @@ class Engine(EngineBase):
|
||||
context, zmq.DEALER, self.port_args.rpc_ipc_name, True
|
||||
)
|
||||
|
||||
if server_args.enable_trace:
|
||||
process_tracing_init(server_args.oltp_traces_endpoint, "sglang")
|
||||
if server_args.disaggregation_mode == "null":
|
||||
thread_label = "Tokenizer"
|
||||
trace_set_thread_info(thread_label)
|
||||
|
||||
def generate(
|
||||
self,
|
||||
# The input prompt. It can be a single prompt or a batch of prompts.
|
||||
|
||||
@@ -31,6 +31,8 @@ from typing import Any, AsyncIterator, Callable, Dict, List, Optional
|
||||
|
||||
import setproctitle
|
||||
|
||||
from sglang.srt.tracing.trace import process_tracing_init, trace_set_thread_info
|
||||
|
||||
# Fix a bug of Python threading
|
||||
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
|
||||
|
||||
@@ -179,6 +181,13 @@ async def init_multi_tokenizer() -> ServerArgs:
|
||||
scheduler_info=scheduler_info,
|
||||
)
|
||||
)
|
||||
|
||||
if server_args.enable_trace:
|
||||
process_tracing_init(server_args.oltp_traces_endpoint, "sglang")
|
||||
if server_args.disaggregation_mode == "null":
|
||||
thread_label = f"MultiTokenizer-{tokenizer_manager.worker_id}"
|
||||
trace_set_thread_info(thread_label)
|
||||
|
||||
return server_args
|
||||
|
||||
|
||||
@@ -1203,6 +1212,12 @@ def launch_server(
|
||||
server_args=server_args,
|
||||
)
|
||||
|
||||
if server_args.enable_trace:
|
||||
process_tracing_init(server_args.oltp_traces_endpoint, "sglang")
|
||||
if server_args.disaggregation_mode == "null":
|
||||
thread_label = "Tokenizer"
|
||||
trace_set_thread_info(thread_label)
|
||||
|
||||
set_global_state(
|
||||
_GlobalState(
|
||||
tokenizer_manager=tokenizer_manager,
|
||||
|
||||
Reference in New Issue
Block a user