Files
r200_8f_xtrt_llm/examples/vllm_test/run_throughput.sh

7 lines
426 B
Bash
Raw Normal View History

2025-08-06 15:49:14 +08:00
#!/bin/bash
model_path=$1
engine_path=$2
#run test fixed input/output benchmark
XMLIR_D_XPU_L3_SIZE=0 python benchmark_throughput.py --backend vllm --model $model_path --tokenizer $model_path --engine_dir $engine_path --tensor-parallel-size 8 --dummy-dataset --max-num-seqs 128 --max-num-batched-tokens 2048 --dummy-tokenid 1 --dummy-input-len 1024 --dummy-output-len 1024 --max-model-len 2048 --num-prompts 128 > server.log