#!/bin/bash model_path=$1 engine_path=$2 #run test fixed input/output benchmark XMLIR_D_XPU_L3_SIZE=0 python benchmark_throughput.py --backend vllm --model $model_path --tokenizer $model_path --engine_dir $engine_path --tensor-parallel-size 8 --dummy-dataset --max-num-seqs 128 --max-num-batched-tokens 2048 --dummy-tokenid 1 --dummy-input-len 1024 --dummy-output-len 1024 --max-model-len 2048 --num-prompts 128 > server.log