78 lines
2.1 KiB
Bash
78 lines
2.1 KiB
Bash
|
|
#!/bin/bash
|
||
|
|
|
||
|
|
# Input model file (corrected filename)
|
||
|
|
INPUT_MODEL="osmosis-apply-1.7b-bf16.gguf"
|
||
|
|
|
||
|
|
# Define quantization formats to generate
|
||
|
|
QUANT_FORMATS=(
|
||
|
|
"Q4_K_S"
|
||
|
|
"Q5_K_M"
|
||
|
|
"Q5_K_S"
|
||
|
|
"Q6_K"
|
||
|
|
"IQ4_XS"
|
||
|
|
"Q8_0"
|
||
|
|
"Q2_K"
|
||
|
|
"Q3_K_L"
|
||
|
|
"Q3_K_M"
|
||
|
|
"Q3_K_S"
|
||
|
|
"Q4_K_M"
|
||
|
|
)
|
||
|
|
|
||
|
|
# Generate bf16 model if it doesn't exist
|
||
|
|
if [ ! -f "$INPUT_MODEL" ]; then
|
||
|
|
echo "bf16 model not found. Generating $INPUT_MODEL..."
|
||
|
|
|
||
|
|
# Run the conversion
|
||
|
|
cd llama.cpp && python3 convert_hf_to_gguf.py ../ --outfile ../osmosis-apply-1.7b-bf16.gguf
|
||
|
|
cd ..
|
||
|
|
|
||
|
|
# Check if bf16 generation was successful
|
||
|
|
if [ ! -f "$INPUT_MODEL" ]; then
|
||
|
|
echo "Error: Failed to generate bf16 model $INPUT_MODEL"
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
echo "Successfully generated $INPUT_MODEL"
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Path to llama-quantize tool (corrected path)
|
||
|
|
QUANTIZE_TOOL="llama.cpp/build/bin/llama-quantize"
|
||
|
|
|
||
|
|
# Check if quantize tool exists
|
||
|
|
if [ ! -f "$QUANTIZE_TOOL" ]; then
|
||
|
|
echo "Error: Quantize tool not found at $QUANTIZE_TOOL"
|
||
|
|
echo "Please build it first by running: cd llama.cpp && mkdir -p build && cd build && cmake .. && make llama-quantize"
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Process each quantization format
|
||
|
|
for format in "${QUANT_FORMATS[@]}"; do
|
||
|
|
echo "------------------------------------------------------"
|
||
|
|
echo "Starting quantization: $format"
|
||
|
|
echo "------------------------------------------------------"
|
||
|
|
|
||
|
|
# Define output filename with the exact format requested
|
||
|
|
OUTPUT_MODEL="osmosis-mcp-1.7b.${format}.gguf"
|
||
|
|
|
||
|
|
# Check if output model already exists
|
||
|
|
if [ -f "$OUTPUT_MODEL" ]; then
|
||
|
|
echo "Model $OUTPUT_MODEL already exists. Skipping..."
|
||
|
|
continue
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Run quantization
|
||
|
|
echo "Quantizing to $format..."
|
||
|
|
"$QUANTIZE_TOOL" "$INPUT_MODEL" "$OUTPUT_MODEL" "$format"
|
||
|
|
|
||
|
|
# Check if quantization was successful
|
||
|
|
if [ $? -eq 0 ]; then
|
||
|
|
echo "Successfully created $OUTPUT_MODEL"
|
||
|
|
else
|
||
|
|
echo "Failed to create $OUTPUT_MODEL"
|
||
|
|
fi
|
||
|
|
|
||
|
|
echo ""
|
||
|
|
done
|
||
|
|
|
||
|
|
echo "All quantizations completed!"
|
||
|
|
echo "Generated models:"
|
||
|
|
ls -lah osmosis-mcp-1.7b.*.gguf
|