[Feat.] Enable grafana to show metrics (#4718)
Co-authored-by: zhaochenyang20 <zhaochen20@outlook.com>
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
|
||||
SGLang exposes the following metrics via Prometheus. The metrics are namespaced by `$name` (the model name).
|
||||
|
||||
An example of the monitoring dashboard is available in [examples/monitoring/grafana.json](../examples/monitoring/grafana.json).
|
||||
An example of the monitoring dashboard is available in [examples/monitoring/grafana.json](../examples/monitoring/grafana/dashboards/json/sglang-dashboard.json).
|
||||
|
||||
Here is an example of the metrics:
|
||||
|
||||
@@ -150,7 +150,7 @@ In a new Grafana setup, ensure that you have the `Prometheus` data source enable
|
||||
|
||||
If not, click `Add data source` -> `Prometheus`, set Prometheus URL to `http://localhost:9090`, and click `Save & Test`.
|
||||
|
||||
To import the Grafana dashboard, click `+` -> `Import` -> `Upload JSON file` -> `Upload` and select [grafana.json](../examples/monitoring/grafana.json).
|
||||
To import the Grafana dashboard, click `+` -> `Import` -> `Upload JSON file` -> `Upload` and select [grafana.json](../examples/monitoring/grafana/dashboards/json/sglang-dashboard.json).
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
|
||||
76
examples/monitoring/README.md
Normal file
76
examples/monitoring/README.md
Normal file
@@ -0,0 +1,76 @@
|
||||
# SGLang Monitoring Setup
|
||||
|
||||
This directory contains a ready-to-use monitoring setup for SGLang using Prometheus and Grafana.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Docker and Docker Compose installed
|
||||
- SGLang server running with metrics enabled
|
||||
|
||||
## Usage
|
||||
|
||||
1. Start your SGLang server with metrics enabled:
|
||||
|
||||
```bash
|
||||
python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --port 30000 --enable-metrics
|
||||
```
|
||||
|
||||
By default, the metrics server will run on `127.0.0.1:30000`.
|
||||
|
||||
2. Start the monitoring stack:
|
||||
|
||||
```bash
|
||||
cd examples/monitoring
|
||||
docker compose up
|
||||
```
|
||||
|
||||
3. Access the monitoring interfaces:
|
||||
- Grafana: [http://localhost:3000](http://localhost:3000)
|
||||
- Prometheus: [http://localhost:9090](http://localhost:9090)
|
||||
|
||||
Default Grafana login credentials:
|
||||
- Username: `admin`
|
||||
- Password: `admin`
|
||||
|
||||
You'll be prompted to change the password on first login.
|
||||
|
||||
4. The SGLang dashboard will be automatically available in the "SGLang Monitoring" folder.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Port Conflicts
|
||||
If you see errors like "port is already allocated":
|
||||
|
||||
1. Check if you already have Prometheus or Grafana running:
|
||||
```bash
|
||||
docker ps | grep -E 'prometheus|grafana'
|
||||
```
|
||||
|
||||
2. Stop any conflicting containers:
|
||||
```bash
|
||||
docker stop <container_id>
|
||||
```
|
||||
|
||||
3. Ensure no other services are using ports 9090 and 3000:
|
||||
```bash
|
||||
lsof -i :9090
|
||||
lsof -i :3000
|
||||
```
|
||||
|
||||
### Connection Issues
|
||||
If Grafana cannot connect to Prometheus:
|
||||
1. Check that both services are running
|
||||
2. Verify the datasource configuration in Grafana
|
||||
3. Check that your SGLang server is properly exposing metrics
|
||||
|
||||
## Configuration
|
||||
|
||||
- Prometheus configuration: `prometheus.yaml`
|
||||
- Docker Compose configuration: `docker-compose.yaml`
|
||||
- Grafana datasource: `grafana/datasources/datasource.yaml`
|
||||
- Grafana dashboard configuration: `grafana/dashboards/config/dashboard.yaml`
|
||||
- SGLang dashboard JSON: `grafana/dashboards/json/sglang-dashboard.json`
|
||||
|
||||
## Customization
|
||||
|
||||
You can customize the monitoring setup by modifying the configuration files as needed.
|
||||
@@ -1,16 +1,28 @@
|
||||
version: '3'
|
||||
services:
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
container_name: prometheus
|
||||
network_mode: host
|
||||
ports:
|
||||
- "9090:9090"
|
||||
volumes:
|
||||
- ${PWD}/prometheus.yaml:/etc/prometheus/prometheus.yml
|
||||
- ./prometheus.yaml:/etc/prometheus/prometheus.yml
|
||||
command:
|
||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||
- '--storage.tsdb.path=/prometheus'
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
container_name: grafana
|
||||
network_mode: host
|
||||
volumes:
|
||||
- ./grafana/datasources:/etc/grafana/provisioning/datasources
|
||||
- ./grafana/dashboards/config:/etc/grafana/provisioning/dashboards
|
||||
- ./grafana/dashboards/json:/var/lib/grafana/dashboards
|
||||
environment:
|
||||
- GF_AUTH_ANONYMOUS_ENABLED=true
|
||||
- GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer
|
||||
- GF_AUTH_BASIC_ENABLED=false
|
||||
- GF_USERS_ALLOW_SIGN_UP=false
|
||||
- GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH=/var/lib/grafana/dashboards/sglang-dashboard.json
|
||||
depends_on:
|
||||
- prometheus
|
||||
ports:
|
||||
- "3000:3000"
|
||||
|
||||
11
examples/monitoring/grafana/dashboards/config/dashboard.yaml
Normal file
11
examples/monitoring/grafana/dashboards/config/dashboard.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
apiVersion: 1
|
||||
providers:
|
||||
- name: 'SGLang'
|
||||
orgId: 1
|
||||
folder: 'SGLang Monitoring'
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 10
|
||||
allowUiUpdates: false
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
||||
@@ -388,7 +388,7 @@
|
||||
},
|
||||
"disableTextWrap": false,
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.95, sum by (le) (rate(sglang:time_to_first_token_seconds_bucket[$__rate_interval])))\r\n",
|
||||
"expr": "histogram_quantile(0.5, sum by (le) (rate(sglang:time_to_first_token_seconds_bucket[$__rate_interval])))\r\n",
|
||||
"fullMetaSearch": false,
|
||||
"hide": false,
|
||||
"includeNullMetadata": true,
|
||||
8
examples/monitoring/grafana/datasources/datasource.yaml
Normal file
8
examples/monitoring/grafana/datasources/datasource.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
apiVersion: 1
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://localhost:9090
|
||||
isDefault: true
|
||||
editable: false
|
||||
Reference in New Issue
Block a user