Files
ModelHub XC 9662933715 初始化项目,由ModelHub XC社区提供模型
Model: zhangsq-nju/Qwen3-1.7B-EdgeRazor-GGUF
Source: Original Platform
2026-06-05 15:44:17 +08:00

86 lines
1.9 KiB
Bash

#!/bin/bash
# For W[N]-A8-KV8, Apple CPU-only Inference: --n-gpu-layers 0
CLI=llama-cli
KV_CACHE_TYPE=q8_0
# Inference parameters for non-thinking mode
TEMPERATURE=0.6
MIN_P=0.00
REPEAT_PENALTY=1.0
PRESENCE_PENALTY=1.5
TOP_K=20
TOP_P=0.95
MODELS=(
./Qwen3-1.7B-EdgeRazor-TQ2_0.gguf
./Qwen3-1.7B-EdgeRazor-TQ1_0.gguf
./Qwen3-1.7B-EdgeRazor-Q4_0.gguf
./Qwen3-1.7B-BF16.gguf
)
# Show available model list
echo "Available models:"
for i in "${!MODELS[@]}"; do
echo " $i) ${MODELS[$i]}"
done
# Select model (default to the first one)
if [ -z "$1" ]; then
echo ""
echo "Usage: $0 <model_index> [prompt]"
echo " model_index: 0, 1, or 2 (default: 0)"
echo " prompt: optional prompt for non-interactive mode"
echo ""
MODEL_INDEX=0
else
MODEL_INDEX=$1
fi
MODEL="${MODELS[$MODEL_INDEX]}"
if [ ! -f "$MODEL" ]; then
echo "Error: Model file not found: $MODEL"
exit 1
fi
echo "Selected model: $MODEL"
echo ""
# Run CLI
if [ -z "$2" ]; then
# Interactive mode
$CLI \
--model "$MODEL" \
--n-gpu-layers 0 \
--cache-type-k "$KV_CACHE_TYPE" \
--cache-type-v "$KV_CACHE_TYPE" \
--temp "$TEMPERATURE" \
--min-p "$MIN_P" \
--repeat-penalty "$REPEAT_PENALTY" \
--presence-penalty "$PRESENCE_PENALTY" \
--top-k "$TOP_K" \
--top-p "$TOP_P" \
--flash-attn \
--conversation \
--interactive-first \
--color
else
# Non-interactive mode (single inference)
PROMPT="$2"
$CLI \
--model "$MODEL" \
--n-gpu-layers 0 \
--cache-type-k "$KV_CACHE_TYPE" \
--cache-type-v "$KV_CACHE_TYPE" \
--temp "$TEMPERATURE" \
--min-p "$MIN_P" \
--repeat-penalty "$REPEAT_PENALTY" \
--presence-penalty "$PRESENCE_PENALTY" \
--top-k "$TOP_K" \
--top-p "$TOP_P" \
--flash-attn \
--prompt "$PROMPT" \
--n-predict 512 \
--color
fi