support prometheus metrics (#1853)

Co-authored-by: Lianmin Zheng <lianminzheng@gmail.com>
Co-authored-by: Byron Hsu <byronhsu1230@gmail.com>
This commit is contained in:
Lzhang-hub
2024-11-06 12:42:53 +08:00
committed by GitHub
parent f5113e50ae
commit a146d9990e
7 changed files with 526 additions and 3 deletions

View File

@@ -25,12 +25,15 @@ import json
import logging
import multiprocessing as mp
import os
import re
import tempfile
import threading
import time
from http import HTTPStatus
from typing import AsyncIterator, Dict, List, Optional, Union
import orjson
from starlette.routing import Mount
# Fix a bug of Python threading
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
@@ -86,6 +89,10 @@ from sglang.utils import get_exception_traceback
logger = logging.getLogger(__name__)
# Temporary directory for prometheus multiprocess mode
# Cleaned up automatically when this object is garbage collected
prometheus_multiproc_dir: tempfile.TemporaryDirectory
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
@@ -412,6 +419,18 @@ def launch_engine(
for i in range(len(scheduler_pipe_readers)):
scheduler_pipe_readers[i].recv()
def add_prometheus_middleware(app: FastAPI):
# Adapted from https://github.com/vllm-project/vllm/blob/v0.6.1/vllm/entrypoints/openai/api_server.py#L216
from prometheus_client import CollectorRegistry, make_asgi_app, multiprocess
registry = CollectorRegistry()
multiprocess.MultiProcessCollector(registry)
metrics_route = Mount("/metrics", make_asgi_app(registry=registry))
# Workaround for 307 Redirect for /metrics
metrics_route.path_regex = re.compile("^/metrics(?P<path>.*)$")
app.routes.append(metrics_route)
def launch_server(
server_args: ServerArgs,
@@ -439,6 +458,11 @@ def launch_server(
if server_args.api_key:
add_api_key_middleware(app, server_args.api_key)
# add prometheus middleware
if server_args.enable_metrics:
_set_prometheus_env()
add_prometheus_middleware(app)
# Send a warmup request
t = threading.Thread(
target=_wait_and_warmup, args=(server_args, pipe_finish_writer)
@@ -466,6 +490,21 @@ def launch_server(
finally:
t.join()
def _set_prometheus_env():
# Set prometheus multiprocess directory
# sglang uses prometheus multiprocess mode
# we need to set this before importing prometheus_client
# https://prometheus.github.io/client_python/multiprocess/
global prometheus_multiproc_dir
if "PROMETHEUS_MULTIPROC_DIR" in os.environ:
logger.debug(f"User set PROMETHEUS_MULTIPROC_DIR detected.")
prometheus_multiproc_dir = tempfile.TemporaryDirectory(
dir=os.environ["PROMETHEUS_MULTIPROC_DIR"]
)
else:
prometheus_multiproc_dir = tempfile.TemporaryDirectory()
os.environ["PROMETHEUS_MULTIPROC_DIR"] = prometheus_multiproc_dir.name
logger.debug(f"PROMETHEUS_MULTIPROC_DIR: {os.environ['PROMETHEUS_MULTIPROC_DIR']}")
def _set_envs_and_config(server_args: ServerArgs):
# Set global environments