fix: fix the missing metrics on non-rank0 nodes (#7720)
This commit is contained in:
@@ -765,7 +765,9 @@ def _launch_subprocesses(
|
|||||||
# When using `Engine` as a Python API, we don't want to block here.
|
# When using `Engine` as a Python API, we don't want to block here.
|
||||||
return None, None, None
|
return None, None, None
|
||||||
|
|
||||||
launch_dummy_health_check_server(server_args.host, server_args.port)
|
launch_dummy_health_check_server(
|
||||||
|
server_args.host, server_args.port, server_args.enable_metrics
|
||||||
|
)
|
||||||
|
|
||||||
for proc in scheduler_procs:
|
for proc in scheduler_procs:
|
||||||
proc.join()
|
proc.join()
|
||||||
|
|||||||
@@ -85,6 +85,8 @@ from torch.profiler import ProfilerActivity, profile, record_function
|
|||||||
from torch.utils._contextlib import _DecoratorContextManager
|
from torch.utils._contextlib import _DecoratorContextManager
|
||||||
from triton.runtime.cache import FileCacheManager
|
from triton.runtime.cache import FileCacheManager
|
||||||
|
|
||||||
|
from sglang.srt.metrics.func_timer import enable_func_timer
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
show_time_cost = False
|
show_time_cost = False
|
||||||
@@ -2049,7 +2051,7 @@ def rank0_log(msg: str):
|
|||||||
logger.info(msg)
|
logger.info(msg)
|
||||||
|
|
||||||
|
|
||||||
def launch_dummy_health_check_server(host, port):
|
def launch_dummy_health_check_server(host, port, enable_metrics):
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
import uvicorn
|
import uvicorn
|
||||||
@@ -2067,6 +2069,11 @@ def launch_dummy_health_check_server(host, port):
|
|||||||
"""Check the health of the http server."""
|
"""Check the health of the http server."""
|
||||||
return Response(status_code=200)
|
return Response(status_code=200)
|
||||||
|
|
||||||
|
# Add prometheus middleware
|
||||||
|
if enable_metrics:
|
||||||
|
add_prometheus_middleware(app)
|
||||||
|
enable_func_timer()
|
||||||
|
|
||||||
config = uvicorn.Config(
|
config = uvicorn.Config(
|
||||||
app,
|
app,
|
||||||
host=host,
|
host=host,
|
||||||
|
|||||||
Reference in New Issue
Block a user