7 lines
426 B
Bash
7 lines
426 B
Bash
|
|
#!/bin/bash
|
||
|
|
|
||
|
|
model_path=$1
|
||
|
|
engine_path=$2
|
||
|
|
|
||
|
|
#run test fixed input/output benchmark
|
||
|
|
XMLIR_D_XPU_L3_SIZE=0 python benchmark_throughput.py --backend vllm --model $model_path --tokenizer $model_path --engine_dir $engine_path --tensor-parallel-size 8 --dummy-dataset --max-num-seqs 128 --max-num-batched-tokens 2048 --dummy-tokenid 1 --dummy-input-len 1024 --dummy-output-len 1024 --max-model-len 2048 --num-prompts 128 > server.log
|