#!/bin/bash # Input model file (corrected filename) INPUT_MODEL="osmosis-apply-1.7b-bf16.gguf" # Define quantization formats to generate QUANT_FORMATS=( "Q4_K_S" "Q5_K_M" "Q5_K_S" "Q6_K" "IQ4_XS" "Q8_0" "Q2_K" "Q3_K_L" "Q3_K_M" "Q3_K_S" "Q4_K_M" ) # Generate bf16 model if it doesn't exist if [ ! -f "$INPUT_MODEL" ]; then echo "bf16 model not found. Generating $INPUT_MODEL..." # Run the conversion cd llama.cpp && python3 convert_hf_to_gguf.py ../ --outfile ../osmosis-apply-1.7b-bf16.gguf cd .. # Check if bf16 generation was successful if [ ! -f "$INPUT_MODEL" ]; then echo "Error: Failed to generate bf16 model $INPUT_MODEL" exit 1 fi echo "Successfully generated $INPUT_MODEL" fi # Path to llama-quantize tool (corrected path) QUANTIZE_TOOL="llama.cpp/build/bin/llama-quantize" # Check if quantize tool exists if [ ! -f "$QUANTIZE_TOOL" ]; then echo "Error: Quantize tool not found at $QUANTIZE_TOOL" echo "Please build it first by running: cd llama.cpp && mkdir -p build && cd build && cmake .. && make llama-quantize" exit 1 fi # Process each quantization format for format in "${QUANT_FORMATS[@]}"; do echo "------------------------------------------------------" echo "Starting quantization: $format" echo "------------------------------------------------------" # Define output filename with the exact format requested OUTPUT_MODEL="osmosis-mcp-1.7b.${format}.gguf" # Check if output model already exists if [ -f "$OUTPUT_MODEL" ]; then echo "Model $OUTPUT_MODEL already exists. Skipping..." continue fi # Run quantization echo "Quantizing to $format..." "$QUANTIZE_TOOL" "$INPUT_MODEL" "$OUTPUT_MODEL" "$format" # Check if quantization was successful if [ $? -eq 0 ]; then echo "Successfully created $OUTPUT_MODEL" else echo "Failed to create $OUTPUT_MODEL" fi echo "" done echo "All quantizations completed!" echo "Generated models:" ls -lah osmosis-mcp-1.7b.*.gguf