Use an env var SGLANG_SET_CPU_AFFINITY to set cpu affinity; turn it off by default (#2222)

This commit is contained in:
Lianmin Zheng
2024-11-27 02:52:46 -08:00
committed by GitHub
parent 6997e28f6e
commit fb6e04a0c2
6 changed files with 23 additions and 20 deletions

View File

@@ -5,9 +5,9 @@ This script launches a server and uses the HTTP interface.
It accepts server arguments (the same as launch_server.py) and benchmark arguments (e.g., batch size, input lengths).
Usage:
python3 -m sglang.bench_server_latency --model meta-llama/Meta-Llama-3.1-8B --batch-size 1 16 64 --input-len 1024 --output-len 8
python3 -m sglang.bench_one_batch_server --model meta-llama/Meta-Llama-3.1-8B --batch-size 1 16 64 --input-len 1024 --output-len 8
python3 -m sglang.bench_server_latency --model None --base-url http://localhost:30000 --batch-size 16 --input-len 1024 --output-len 8
python3 -m sglang.bench_one_batch_server --model None --base-url http://localhost:30000 --batch-size 16 --input-len 1024 --output-len 8
"""
import argparse