21 lines
622 B
Bash
21 lines
622 B
Bash
|
|
SCALE=""
|
||
|
|
for _b in {1..8}; do
|
||
|
|
for _len in {64..1024..32}; do
|
||
|
|
SCALE+="${_b}x${_len}x${_len}E"
|
||
|
|
done
|
||
|
|
done
|
||
|
|
for i in {8..1}; do
|
||
|
|
SCALE+="${i}x2000x64E"
|
||
|
|
done
|
||
|
|
SCALE+="1x2000x64"
|
||
|
|
|
||
|
|
PYTORCH_NO_XPU_MEMORY_CACHING=1 XMLIR_D_XPU_L3_SIZE=0 \
|
||
|
|
python3 run.py \
|
||
|
|
--engine_dir=/root/.cache/llama_outputs/ \
|
||
|
|
--max_output_len 256 \
|
||
|
|
--performance_test_scale 1x2000x64E2x2000x64E4x2000x64E8x2000x64E11x2000x64E1x2000x64E2x2000x64E4x2000x64E8x2000x64E11x2000x64 \
|
||
|
|
--tokenizer_dir=/root/.cache/huggingface/hub/models--huggyllama--llama-7b/snapshots/8416d3fefb0cb3ff5775a7b13c1692d10ff1aa16/ \
|
||
|
|
--log_level=info
|
||
|
|
|
||
|
|
#_remove_padding
|