add qwen3
This commit is contained in:
@@ -0,0 +1,34 @@
|
||||
#/bin/bash
|
||||
|
||||
# export EXPERT_PARALLEL_EN=True
|
||||
# export VLLM_LATENCY_DEBUG=True
|
||||
|
||||
rm output/client -rf
|
||||
mkdir -p output/client
|
||||
|
||||
PORT=32345
|
||||
MODEL_PATH="/data/vllm/sq_per_token_per_channel/deepseek_v2_temp"
|
||||
input_sizes=(1024)
|
||||
output_sizes=(1)
|
||||
# batch_sizes=(1 2 4 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40)
|
||||
batch_sizes=(32)
|
||||
for input_size in "${input_sizes[@]}"; do
|
||||
for output_size in "${output_sizes[@]}"; do
|
||||
for batch_size in "${batch_sizes[@]}"; do
|
||||
hf_model_name=$(basename "${HF_MODEL}")
|
||||
LOG_FILE=output/client/${hf_model_name}_${input_size}_${output_size}_bs_${batch_size}.log
|
||||
python benchmarks/benchmark_serving.py \
|
||||
--backend vllm \
|
||||
--model ${MODEL_PATH} \
|
||||
--trust-remote-code \
|
||||
--dataset-name random \
|
||||
--num-prompts 1000 \
|
||||
--port ${PORT} \
|
||||
--request-rate inf \
|
||||
--random_input_len $input_size \
|
||||
--random-output-len ${output_size} \
|
||||
--max-concurrency ${batch_size} \
|
||||
2>&1 | tee ${LOG_FILE}
|
||||
done
|
||||
done
|
||||
done
|
||||
Reference in New Issue
Block a user