SCALE="" for _b in {1..8}; do for _len in {64..1024..32}; do SCALE+="${_b}x${_len}x${_len}E" done done for i in {8..1}; do SCALE+="${i}x2000x64E" done SCALE+="1x2000x64" PYTORCH_NO_XPU_MEMORY_CACHING=1 XMLIR_D_XPU_L3_SIZE=0 \ python3 run.py \ --engine_dir=/root/.cache/llama_outputs/ \ --max_output_len 256 \ --performance_test_scale 1x2000x64E2x2000x64E4x2000x64E8x2000x64E11x2000x64E1x2000x64E2x2000x64E4x2000x64E8x2000x64E11x2000x64 \ --tokenizer_dir=/root/.cache/huggingface/hub/models--huggyllama--llama-7b/snapshots/8416d3fefb0cb3ff5775a7b13c1692d10ff1aa16/ \ --log_level=info #_remove_padding