From 57131dd955b35e63788ebf3be5dd40ef506e9abf Mon Sep 17 00:00:00 2001 From: Huapeng Zhou <73010314+PopSoda2002@users.noreply.github.com> Date: Mon, 21 Apr 2025 15:43:42 +0800 Subject: [PATCH] [Feat.] Enable grafana to show metrics (#4718) Co-authored-by: zhaochenyang20 --- docs/references/production_metrics.md | 4 +- examples/monitoring/README.md | 76 +++++++++++++++++++ examples/monitoring/docker-compose.yaml | 22 ++++-- .../grafana/dashboards/config/dashboard.yaml | 11 +++ .../dashboards/json/sglang-dashboard.json} | 2 +- .../grafana/datasources/datasource.yaml | 8 ++ 6 files changed, 115 insertions(+), 8 deletions(-) create mode 100644 examples/monitoring/README.md create mode 100644 examples/monitoring/grafana/dashboards/config/dashboard.yaml rename examples/monitoring/{grafana.json => grafana/dashboards/json/sglang-dashboard.json} (99%) create mode 100644 examples/monitoring/grafana/datasources/datasource.yaml diff --git a/docs/references/production_metrics.md b/docs/references/production_metrics.md index 7183158e1..e321f3957 100644 --- a/docs/references/production_metrics.md +++ b/docs/references/production_metrics.md @@ -2,7 +2,7 @@ SGLang exposes the following metrics via Prometheus. The metrics are namespaced by `$name` (the model name). -An example of the monitoring dashboard is available in [examples/monitoring/grafana.json](../examples/monitoring/grafana.json). +An example of the monitoring dashboard is available in [examples/monitoring/grafana.json](../examples/monitoring/grafana/dashboards/json/sglang-dashboard.json). Here is an example of the metrics: @@ -150,7 +150,7 @@ In a new Grafana setup, ensure that you have the `Prometheus` data source enable If not, click `Add data source` -> `Prometheus`, set Prometheus URL to `http://localhost:9090`, and click `Save & Test`. -To import the Grafana dashboard, click `+` -> `Import` -> `Upload JSON file` -> `Upload` and select [grafana.json](../examples/monitoring/grafana.json). +To import the Grafana dashboard, click `+` -> `Import` -> `Upload JSON file` -> `Upload` and select [grafana.json](../examples/monitoring/grafana/dashboards/json/sglang-dashboard.json). ### Troubleshooting diff --git a/examples/monitoring/README.md b/examples/monitoring/README.md new file mode 100644 index 000000000..3eef0b09b --- /dev/null +++ b/examples/monitoring/README.md @@ -0,0 +1,76 @@ +# SGLang Monitoring Setup + +This directory contains a ready-to-use monitoring setup for SGLang using Prometheus and Grafana. + +## Prerequisites + +- Docker and Docker Compose installed +- SGLang server running with metrics enabled + +## Usage + +1. Start your SGLang server with metrics enabled: + +```bash +python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --port 30000 --enable-metrics +``` + +By default, the metrics server will run on `127.0.0.1:30000`. + +2. Start the monitoring stack: + +```bash +cd examples/monitoring +docker compose up +``` + +3. Access the monitoring interfaces: + - Grafana: [http://localhost:3000](http://localhost:3000) + - Prometheus: [http://localhost:9090](http://localhost:9090) + +Default Grafana login credentials: +- Username: `admin` +- Password: `admin` + +You'll be prompted to change the password on first login. + +4. The SGLang dashboard will be automatically available in the "SGLang Monitoring" folder. + +## Troubleshooting + +### Port Conflicts +If you see errors like "port is already allocated": + +1. Check if you already have Prometheus or Grafana running: + ```bash + docker ps | grep -E 'prometheus|grafana' + ``` + +2. Stop any conflicting containers: + ```bash + docker stop + ``` + +3. Ensure no other services are using ports 9090 and 3000: + ```bash + lsof -i :9090 + lsof -i :3000 + ``` + +### Connection Issues +If Grafana cannot connect to Prometheus: +1. Check that both services are running +2. Verify the datasource configuration in Grafana +3. Check that your SGLang server is properly exposing metrics + +## Configuration + +- Prometheus configuration: `prometheus.yaml` +- Docker Compose configuration: `docker-compose.yaml` +- Grafana datasource: `grafana/datasources/datasource.yaml` +- Grafana dashboard configuration: `grafana/dashboards/config/dashboard.yaml` +- SGLang dashboard JSON: `grafana/dashboards/json/sglang-dashboard.json` + +## Customization + +You can customize the monitoring setup by modifying the configuration files as needed. diff --git a/examples/monitoring/docker-compose.yaml b/examples/monitoring/docker-compose.yaml index 6c18b4046..ce6457fa3 100644 --- a/examples/monitoring/docker-compose.yaml +++ b/examples/monitoring/docker-compose.yaml @@ -1,16 +1,28 @@ +version: '3' services: prometheus: image: prom/prometheus:latest + container_name: prometheus network_mode: host - ports: - - "9090:9090" volumes: - - ${PWD}/prometheus.yaml:/etc/prometheus/prometheus.yml + - ./prometheus.yaml:/etc/prometheus/prometheus.yml + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' grafana: image: grafana/grafana:latest + container_name: grafana network_mode: host + volumes: + - ./grafana/datasources:/etc/grafana/provisioning/datasources + - ./grafana/dashboards/config:/etc/grafana/provisioning/dashboards + - ./grafana/dashboards/json:/var/lib/grafana/dashboards + environment: + - GF_AUTH_ANONYMOUS_ENABLED=true + - GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer + - GF_AUTH_BASIC_ENABLED=false + - GF_USERS_ALLOW_SIGN_UP=false + - GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH=/var/lib/grafana/dashboards/sglang-dashboard.json depends_on: - prometheus - ports: - - "3000:3000" diff --git a/examples/monitoring/grafana/dashboards/config/dashboard.yaml b/examples/monitoring/grafana/dashboards/config/dashboard.yaml new file mode 100644 index 000000000..6c17a6c63 --- /dev/null +++ b/examples/monitoring/grafana/dashboards/config/dashboard.yaml @@ -0,0 +1,11 @@ +apiVersion: 1 +providers: + - name: 'SGLang' + orgId: 1 + folder: 'SGLang Monitoring' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: false + options: + path: /var/lib/grafana/dashboards diff --git a/examples/monitoring/grafana.json b/examples/monitoring/grafana/dashboards/json/sglang-dashboard.json similarity index 99% rename from examples/monitoring/grafana.json rename to examples/monitoring/grafana/dashboards/json/sglang-dashboard.json index 6f2b640bc..d4a022d08 100644 --- a/examples/monitoring/grafana.json +++ b/examples/monitoring/grafana/dashboards/json/sglang-dashboard.json @@ -388,7 +388,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by (le) (rate(sglang:time_to_first_token_seconds_bucket[$__rate_interval])))\r\n", + "expr": "histogram_quantile(0.5, sum by (le) (rate(sglang:time_to_first_token_seconds_bucket[$__rate_interval])))\r\n", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, diff --git a/examples/monitoring/grafana/datasources/datasource.yaml b/examples/monitoring/grafana/datasources/datasource.yaml new file mode 100644 index 000000000..1ab0e4a5f --- /dev/null +++ b/examples/monitoring/grafana/datasources/datasource.yaml @@ -0,0 +1,8 @@ +apiVersion: 1 +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://localhost:9090 + isDefault: true + editable: false