Sgl-router Prometheus metrics endpoint and usage track metrics (#6537)
This commit is contained in:
@@ -48,6 +48,9 @@ class RouterArgs:
|
||||
selector: Dict[str, str] = dataclasses.field(default_factory=dict)
|
||||
service_discovery_port: int = 80
|
||||
service_discovery_namespace: Optional[str] = None
|
||||
# Prometheus configuration
|
||||
prometheus_port: Optional[int] = None
|
||||
prometheus_host: Optional[str] = None
|
||||
|
||||
@staticmethod
|
||||
def add_cli_args(
|
||||
@@ -176,6 +179,19 @@ class RouterArgs:
|
||||
type=str,
|
||||
help="Kubernetes namespace to watch for pods. If not provided, watches all namespaces (requires cluster-wide permissions)",
|
||||
)
|
||||
# Prometheus configuration
|
||||
parser.add_argument(
|
||||
f"--{prefix}prometheus-port",
|
||||
type=int,
|
||||
default=29000,
|
||||
help="Port to expose Prometheus metrics. If not specified, Prometheus metrics are disabled",
|
||||
)
|
||||
parser.add_argument(
|
||||
f"--{prefix}prometheus-host",
|
||||
type=str,
|
||||
default="127.0.0.1",
|
||||
help="Host address to bind the Prometheus metrics server",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_cli_args(
|
||||
@@ -215,6 +231,8 @@ class RouterArgs:
|
||||
service_discovery_namespace=getattr(
|
||||
args, f"{prefix}service_discovery_namespace", None
|
||||
),
|
||||
prometheus_port=getattr(args, f"{prefix}prometheus_port", None),
|
||||
prometheus_host=getattr(args, f"{prefix}prometheus_host", None),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
@@ -278,6 +296,8 @@ def launch_router(args: argparse.Namespace) -> Optional[Router]:
|
||||
selector=router_args.selector,
|
||||
service_discovery_port=router_args.service_discovery_port,
|
||||
service_discovery_namespace=router_args.service_discovery_namespace,
|
||||
prometheus_port=router_args.prometheus_port,
|
||||
prometheus_host=router_args.prometheus_host,
|
||||
)
|
||||
|
||||
router.start()
|
||||
|
||||
@@ -40,6 +40,8 @@ class Router:
|
||||
worker URLs using this port. Default: 80
|
||||
service_discovery_namespace: Kubernetes namespace to watch for pods. If not provided,
|
||||
watches pods across all namespaces (requires cluster-wide permissions). Default: None
|
||||
prometheus_port: Port to expose Prometheus metrics. Default: None
|
||||
prometheus_host: Host address to bind the Prometheus metrics server. Default: None
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -62,6 +64,8 @@ class Router:
|
||||
selector: Dict[str, str] = None,
|
||||
service_discovery_port: int = 80,
|
||||
service_discovery_namespace: Optional[str] = None,
|
||||
prometheus_port: Optional[int] = None,
|
||||
prometheus_host: Optional[str] = None,
|
||||
):
|
||||
if selector is None:
|
||||
selector = {}
|
||||
@@ -85,6 +89,8 @@ class Router:
|
||||
selector=selector,
|
||||
service_discovery_port=service_discovery_port,
|
||||
service_discovery_namespace=service_discovery_namespace,
|
||||
prometheus_port=prometheus_port,
|
||||
prometheus_host=prometheus_host,
|
||||
)
|
||||
|
||||
def start(self) -> None:
|
||||
|
||||
Reference in New Issue
Block a user