86 lines
1.9 KiB
Bash
86 lines
1.9 KiB
Bash
#!/bin/bash
|
|
# For W[N]-A8-KV8, Apple CPU-only Inference: --n-gpu-layers 0
|
|
|
|
CLI=llama-cli
|
|
KV_CACHE_TYPE=q8_0
|
|
|
|
# Inference parameters for non-thinking mode
|
|
TEMPERATURE=0.6
|
|
MIN_P=0.00
|
|
REPEAT_PENALTY=1.0
|
|
PRESENCE_PENALTY=1.5
|
|
TOP_K=20
|
|
TOP_P=0.95
|
|
|
|
MODELS=(
|
|
./Qwen3-1.7B-EdgeRazor-TQ2_0.gguf
|
|
./Qwen3-1.7B-EdgeRazor-TQ1_0.gguf
|
|
./Qwen3-1.7B-EdgeRazor-Q4_0.gguf
|
|
./Qwen3-1.7B-BF16.gguf
|
|
)
|
|
|
|
# Show available model list
|
|
echo "Available models:"
|
|
for i in "${!MODELS[@]}"; do
|
|
echo " $i) ${MODELS[$i]}"
|
|
done
|
|
|
|
# Select model (default to the first one)
|
|
if [ -z "$1" ]; then
|
|
echo ""
|
|
echo "Usage: $0 <model_index> [prompt]"
|
|
echo " model_index: 0, 1, or 2 (default: 0)"
|
|
echo " prompt: optional prompt for non-interactive mode"
|
|
echo ""
|
|
MODEL_INDEX=0
|
|
else
|
|
MODEL_INDEX=$1
|
|
fi
|
|
|
|
MODEL="${MODELS[$MODEL_INDEX]}"
|
|
|
|
if [ ! -f "$MODEL" ]; then
|
|
echo "Error: Model file not found: $MODEL"
|
|
exit 1
|
|
fi
|
|
|
|
echo "Selected model: $MODEL"
|
|
echo ""
|
|
|
|
# Run CLI
|
|
if [ -z "$2" ]; then
|
|
# Interactive mode
|
|
$CLI \
|
|
--model "$MODEL" \
|
|
--n-gpu-layers 0 \
|
|
--cache-type-k "$KV_CACHE_TYPE" \
|
|
--cache-type-v "$KV_CACHE_TYPE" \
|
|
--temp "$TEMPERATURE" \
|
|
--min-p "$MIN_P" \
|
|
--repeat-penalty "$REPEAT_PENALTY" \
|
|
--presence-penalty "$PRESENCE_PENALTY" \
|
|
--top-k "$TOP_K" \
|
|
--top-p "$TOP_P" \
|
|
--flash-attn \
|
|
--conversation \
|
|
--interactive-first \
|
|
--color
|
|
else
|
|
# Non-interactive mode (single inference)
|
|
PROMPT="$2"
|
|
$CLI \
|
|
--model "$MODEL" \
|
|
--n-gpu-layers 0 \
|
|
--cache-type-k "$KV_CACHE_TYPE" \
|
|
--cache-type-v "$KV_CACHE_TYPE" \
|
|
--temp "$TEMPERATURE" \
|
|
--min-p "$MIN_P" \
|
|
--repeat-penalty "$REPEAT_PENALTY" \
|
|
--presence-penalty "$PRESENCE_PENALTY" \
|
|
--top-k "$TOP_K" \
|
|
--top-p "$TOP_P" \
|
|
--flash-attn \
|
|
--prompt "$PROMPT" \
|
|
--n-predict 512 \
|
|
--color
|
|
fi |