git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa.git GPTQ-for-LLaMa

pip install -r ./GPTQ-for-LLaMa/requirements.txt

CUDA_VISIBLE_DEVICES=0 python3 GPTQ-for-LLaMa/neox.py ./gptneox_model \
wikitext2 \
--wbits 4 \
--groupsize 128 \
--save_safetensors ./gptneox_model/gptneox-20b-4bit-gs128.safetensors