初始化项目,由ModelHub XC社区提供模型
Model: zhangsq-nju/Qwen3-1.7B-EdgeRazor-GGUF Source: Original Platform
This commit is contained in:
86
cli.sh
Normal file
86
cli.sh
Normal file
@@ -0,0 +1,86 @@
|
||||
#!/bin/bash
|
||||
# For W[N]-A8-KV8, Apple CPU-only Inference: --n-gpu-layers 0
|
||||
|
||||
CLI=llama-cli
|
||||
KV_CACHE_TYPE=q8_0
|
||||
|
||||
# Inference parameters for non-thinking mode
|
||||
TEMPERATURE=0.6
|
||||
MIN_P=0.00
|
||||
REPEAT_PENALTY=1.0
|
||||
PRESENCE_PENALTY=1.5
|
||||
TOP_K=20
|
||||
TOP_P=0.95
|
||||
|
||||
MODELS=(
|
||||
./Qwen3-1.7B-EdgeRazor-TQ2_0.gguf
|
||||
./Qwen3-1.7B-EdgeRazor-TQ1_0.gguf
|
||||
./Qwen3-1.7B-EdgeRazor-Q4_0.gguf
|
||||
./Qwen3-1.7B-BF16.gguf
|
||||
)
|
||||
|
||||
# Show available model list
|
||||
echo "Available models:"
|
||||
for i in "${!MODELS[@]}"; do
|
||||
echo " $i) ${MODELS[$i]}"
|
||||
done
|
||||
|
||||
# Select model (default to the first one)
|
||||
if [ -z "$1" ]; then
|
||||
echo ""
|
||||
echo "Usage: $0 <model_index> [prompt]"
|
||||
echo " model_index: 0, 1, or 2 (default: 0)"
|
||||
echo " prompt: optional prompt for non-interactive mode"
|
||||
echo ""
|
||||
MODEL_INDEX=0
|
||||
else
|
||||
MODEL_INDEX=$1
|
||||
fi
|
||||
|
||||
MODEL="${MODELS[$MODEL_INDEX]}"
|
||||
|
||||
if [ ! -f "$MODEL" ]; then
|
||||
echo "Error: Model file not found: $MODEL"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Selected model: $MODEL"
|
||||
echo ""
|
||||
|
||||
# Run CLI
|
||||
if [ -z "$2" ]; then
|
||||
# Interactive mode
|
||||
$CLI \
|
||||
--model "$MODEL" \
|
||||
--n-gpu-layers 0 \
|
||||
--cache-type-k "$KV_CACHE_TYPE" \
|
||||
--cache-type-v "$KV_CACHE_TYPE" \
|
||||
--temp "$TEMPERATURE" \
|
||||
--min-p "$MIN_P" \
|
||||
--repeat-penalty "$REPEAT_PENALTY" \
|
||||
--presence-penalty "$PRESENCE_PENALTY" \
|
||||
--top-k "$TOP_K" \
|
||||
--top-p "$TOP_P" \
|
||||
--flash-attn \
|
||||
--conversation \
|
||||
--interactive-first \
|
||||
--color
|
||||
else
|
||||
# Non-interactive mode (single inference)
|
||||
PROMPT="$2"
|
||||
$CLI \
|
||||
--model "$MODEL" \
|
||||
--n-gpu-layers 0 \
|
||||
--cache-type-k "$KV_CACHE_TYPE" \
|
||||
--cache-type-v "$KV_CACHE_TYPE" \
|
||||
--temp "$TEMPERATURE" \
|
||||
--min-p "$MIN_P" \
|
||||
--repeat-penalty "$REPEAT_PENALTY" \
|
||||
--presence-penalty "$PRESENCE_PENALTY" \
|
||||
--top-k "$TOP_K" \
|
||||
--top-p "$TOP_P" \
|
||||
--flash-attn \
|
||||
--prompt "$PROMPT" \
|
||||
--n-predict 512 \
|
||||
--color
|
||||
fi
|
||||
Reference in New Issue
Block a user