Files
Qwen2.5-0.5B-quantized.w8a8/quantize_qwen2.5_fp8.sh
ModelHub XC 0c98de33a8 初始化项目,由ModelHub XC社区提供模型
Model: RedHatAI/Qwen2.5-0.5B-quantized.w8a8
Source: Original Platform
2026-04-28 14:42:56 +08:00

39 lines
832 B
Bash

source ~/environments/clearml/bin/activate
recipe_template=$(cat <<'EOF'
quant_stage:
quant_modifiers:
QuantizationModifier:
ignore: ["lm_head"]
scheme: FP8
targets: ["Linear"]
observer: "mse"
EOF
)
for size in 0.5B 1.5B 3B 7B 32B 72B
do
for version in base instruct
do
if [ $version = "base" ]; then
model="Qwen2.5-${size}"
else
model="Qwen2.5-${size}-Instruct"
fi
prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196__damp01"
python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \
--model-id "Qwen/"$model \
--project-name "LLM quantization - FP8/llmcompressor/Qwen2.5" \
--task-prefix $prefix \
--recipe "${recipe}" \
--num-samples 512 \
--max-seq-len 8196 \
--tags "Qwen2.5" "W4A16" "calibration" $size "MSE" $version
done
done