Use an env var SGLANG_SET_CPU_AFFINITY to set cpu affinity; turn it off by default (#2222)
This commit is contained in:
@@ -5,9 +5,9 @@ This script launches a server and uses the HTTP interface.
|
||||
It accepts server arguments (the same as launch_server.py) and benchmark arguments (e.g., batch size, input lengths).
|
||||
|
||||
Usage:
|
||||
python3 -m sglang.bench_server_latency --model meta-llama/Meta-Llama-3.1-8B --batch-size 1 16 64 --input-len 1024 --output-len 8
|
||||
python3 -m sglang.bench_one_batch_server --model meta-llama/Meta-Llama-3.1-8B --batch-size 1 16 64 --input-len 1024 --output-len 8
|
||||
|
||||
python3 -m sglang.bench_server_latency --model None --base-url http://localhost:30000 --batch-size 16 --input-len 1024 --output-len 8
|
||||
python3 -m sglang.bench_one_batch_server --model None --base-url http://localhost:30000 --batch-size 16 --input-len 1024 --output-len 8
|
||||
"""
|
||||
|
||||
import argparse
|
||||
|
||||
Reference in New Issue
Block a user