初始化项目,由ModelHub XC社区提供模型
Model: zhangsq-nju/Qwen3-1.7B-EdgeRazor-GGUF Source: Original Platform
This commit is contained in:
40
.gitattributes
vendored
Normal file
40
.gitattributes
vendored
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
Qwen3-1.7B-BF16.gguf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
Qwen3-1.7B-EdgeRazor-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
Qwen3-1.7B-EdgeRazor-TQ1_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
Qwen3-1.7B-EdgeRazor-TQ2_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
asset/Logo-HF.png filter=lfs diff=lfs merge=lfs -text
|
||||||
3
Qwen3-1.7B-BF16.gguf
Normal file
3
Qwen3-1.7B-BF16.gguf
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:d4ca7ef8959fa7e23e656b80e39a1f5400fc3f2d427133ee84dc2a57633b5a1d
|
||||||
|
size 4069679360
|
||||||
3
Qwen3-1.7B-EdgeRazor-Q4_0.gguf
Normal file
3
Qwen3-1.7B-EdgeRazor-Q4_0.gguf
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:a876b60cf705906a10fceac52427b54abdc5511ad7751929bb37d7590cc12439
|
||||||
|
size 1054423360
|
||||||
3
Qwen3-1.7B-EdgeRazor-TQ1_0.gguf
Normal file
3
Qwen3-1.7B-EdgeRazor-TQ1_0.gguf
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:d665083d69b7c51bed4f16fbbeab196cd6fc0242aeca37092710a8f7f14a55d4
|
||||||
|
size 478748992
|
||||||
3
Qwen3-1.7B-EdgeRazor-TQ2_0.gguf
Normal file
3
Qwen3-1.7B-EdgeRazor-TQ2_0.gguf
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:c7a8c4018eb0505d0a686110a135c311cdd952c0cf8361e7504513b733b1ca1c
|
||||||
|
size 544809280
|
||||||
81
README.md
Normal file
81
README.md
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
---
|
||||||
|
base_model: Qwen/Qwen3-1.7B
|
||||||
|
pipeline_tag: text-generation
|
||||||
|
tags:
|
||||||
|
- qwen3
|
||||||
|
- edgerazor
|
||||||
|
- quantization
|
||||||
|
license: apache-2.0
|
||||||
|
license_link: https://huggingface.co/Qwen/Qwen3-1.7B/blob/main/LICENSE
|
||||||
|
---
|
||||||
|
|
||||||
|
<div align="center">
|
||||||
|
<br/>
|
||||||
|
<img src="./asset/Logo-HF.png" alt="EdgeRazor Logo" width="60%">
|
||||||
|
<h3>
|
||||||
|
EdgeRazor for Lightweight LLMs
|
||||||
|
</h3>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
<a href="https://arxiv.org/abs/2605.04062" target="blank">
|
||||||
|
<img src="https://img.shields.io/badge/arXiv-EdgeRazor-b31b1b?style=flat&logo=arxiv" alt="arXiv EdgeRazor">
|
||||||
|
</a>
|
||||||
|
<a href="https://github.com/zhangsq-nju/EdgeRazor" target="blank">
|
||||||
|
<img src="https://img.shields.io/badge/GitHub-EdgeRazor-blue?style=flat&logo=github" alt="GitHub EdgeRazor">
|
||||||
|
</a>
|
||||||
|
<a href="https://pypi.org/project/edgerazor/" target="blank">
|
||||||
|
<img src="https://img.shields.io/pypi/v/edgerazor?style=flat&logo=pypi" alt="PyPI EdgeRazor">
|
||||||
|
</a>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
## Contents
|
||||||
|
|
||||||
|
- [Contents](#contents)
|
||||||
|
- [Model Overview](#model-overview)
|
||||||
|
- [Model Bit-Widths](#model-bit-widths)
|
||||||
|
- [Get Started](#get-started)
|
||||||
|
- [Citation](#citation)
|
||||||
|
|
||||||
|
## Model Overview
|
||||||
|
|
||||||
|
- Base Model: [Qwen/Qwen3-1.7B](https://huggingface.co/Qwen/Qwen3-1.7B)
|
||||||
|
- Training: [zhangsq-nju/EdgeRazor](https://github.com/zhangsq-nju/EdgeRazor)
|
||||||
|
- Inference: [ggml-org/llama.cpp](https://github.com/ggml-org/llama.cpp)
|
||||||
|
|
||||||
|
## Model Bit-Widths
|
||||||
|
|
||||||
|
| Mixed-Precision Recipe | Bit-Width | This Repo | GGUF Type |
|
||||||
|
| ---------------------------- | --------- | --------- | ------------- |
|
||||||
|
| 100% 4-bit + 0% 1.58-bit | 4 | ✔️ | Q4_0 |
|
||||||
|
| 50% 4-bit + 50% 1.58-bit | 2.79 | ✖️ | Not supported |
|
||||||
|
| 12.5% 4-bit + 87.5% 1.58-bit | 1.88 | ✖️ | Not supported |
|
||||||
|
| 0% 4-bit + 100% 1.58-bit | 1.58 | ✔️ | TQ1_0, TQ2_0 |
|
||||||
|
|
||||||
|
## Get Started
|
||||||
|
|
||||||
|
Use llama.cpp to conduct efficient inference on edge devices.
|
||||||
|
|
||||||
|
Check the [cli.sh](./cli.sh) script for basic usage.
|
||||||
|
|
||||||
|
Model list:
|
||||||
|
|
||||||
|
- `Qwen3-1.7B-BF16.gguf`: BF16 model from the original Qwen3-1.7B
|
||||||
|
- `Qwen3-1.7B-EdgeRazor-Q4_0.gguf`: Q4_0 model from the [Qwen3-1.7B-EdgeRazor-4bit](https://huggingface.co/zhangsq-nju/Qwen3-1.7B-EdgeRazor-4bit)
|
||||||
|
- `Qwen3-1.7B-EdgeRazor-TQ1_0.gguf`: TQ1_0 model from [Qwen3-1.7B-EdgeRazor-1.58bit](https://huggingface.co/zhangsq-nju/Qwen3-1.7B-EdgeRazor-1.58bit)
|
||||||
|
- `Qwen3-1.7B-EdgeRazor-TQ2_0.gguf`: TQ2_0 model from [Qwen3-1.7B-EdgeRazor-1.58bit](https://huggingface.co/zhangsq-nju/Qwen3-1.7B-EdgeRazor-1.58bit)
|
||||||
|
|
||||||
|
## Citation
|
||||||
|
|
||||||
|
If you find our project useful in your research, please consider kindly citing our papers ✏️:
|
||||||
|
|
||||||
|
```
|
||||||
|
@article{zhangsh-edgerazor,
|
||||||
|
title={{EdgeRazor}: A Lightweight Framework for Large Language Models via Mixed-Precision Quantization-Aware Distillation},
|
||||||
|
author={Shu-Hao Zhang and Le-Tong Huang and Xiang-Sheng Deng and Xin-Yi Zou and Chen Wu and Nan Li and Shao-Qun Zhang},
|
||||||
|
year={2026},
|
||||||
|
journal={arXiv preprint arXiv:2605.04062}
|
||||||
|
}
|
||||||
|
```
|
||||||
3
asset/Logo-HF.png
Normal file
3
asset/Logo-HF.png
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:fba2b7d652f19541cdcaf68c7c6c14e3e92f5c651758d9f08cb7178b195c2a91
|
||||||
|
size 617767
|
||||||
38
asset/Logo-HF.svg
Normal file
38
asset/Logo-HF.svg
Normal file
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 159 KiB |
86
cli.sh
Normal file
86
cli.sh
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# For W[N]-A8-KV8, Apple CPU-only Inference: --n-gpu-layers 0
|
||||||
|
|
||||||
|
CLI=llama-cli
|
||||||
|
KV_CACHE_TYPE=q8_0
|
||||||
|
|
||||||
|
# Inference parameters for non-thinking mode
|
||||||
|
TEMPERATURE=0.6
|
||||||
|
MIN_P=0.00
|
||||||
|
REPEAT_PENALTY=1.0
|
||||||
|
PRESENCE_PENALTY=1.5
|
||||||
|
TOP_K=20
|
||||||
|
TOP_P=0.95
|
||||||
|
|
||||||
|
MODELS=(
|
||||||
|
./Qwen3-1.7B-EdgeRazor-TQ2_0.gguf
|
||||||
|
./Qwen3-1.7B-EdgeRazor-TQ1_0.gguf
|
||||||
|
./Qwen3-1.7B-EdgeRazor-Q4_0.gguf
|
||||||
|
./Qwen3-1.7B-BF16.gguf
|
||||||
|
)
|
||||||
|
|
||||||
|
# Show available model list
|
||||||
|
echo "Available models:"
|
||||||
|
for i in "${!MODELS[@]}"; do
|
||||||
|
echo " $i) ${MODELS[$i]}"
|
||||||
|
done
|
||||||
|
|
||||||
|
# Select model (default to the first one)
|
||||||
|
if [ -z "$1" ]; then
|
||||||
|
echo ""
|
||||||
|
echo "Usage: $0 <model_index> [prompt]"
|
||||||
|
echo " model_index: 0, 1, or 2 (default: 0)"
|
||||||
|
echo " prompt: optional prompt for non-interactive mode"
|
||||||
|
echo ""
|
||||||
|
MODEL_INDEX=0
|
||||||
|
else
|
||||||
|
MODEL_INDEX=$1
|
||||||
|
fi
|
||||||
|
|
||||||
|
MODEL="${MODELS[$MODEL_INDEX]}"
|
||||||
|
|
||||||
|
if [ ! -f "$MODEL" ]; then
|
||||||
|
echo "Error: Model file not found: $MODEL"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Selected model: $MODEL"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Run CLI
|
||||||
|
if [ -z "$2" ]; then
|
||||||
|
# Interactive mode
|
||||||
|
$CLI \
|
||||||
|
--model "$MODEL" \
|
||||||
|
--n-gpu-layers 0 \
|
||||||
|
--cache-type-k "$KV_CACHE_TYPE" \
|
||||||
|
--cache-type-v "$KV_CACHE_TYPE" \
|
||||||
|
--temp "$TEMPERATURE" \
|
||||||
|
--min-p "$MIN_P" \
|
||||||
|
--repeat-penalty "$REPEAT_PENALTY" \
|
||||||
|
--presence-penalty "$PRESENCE_PENALTY" \
|
||||||
|
--top-k "$TOP_K" \
|
||||||
|
--top-p "$TOP_P" \
|
||||||
|
--flash-attn \
|
||||||
|
--conversation \
|
||||||
|
--interactive-first \
|
||||||
|
--color
|
||||||
|
else
|
||||||
|
# Non-interactive mode (single inference)
|
||||||
|
PROMPT="$2"
|
||||||
|
$CLI \
|
||||||
|
--model "$MODEL" \
|
||||||
|
--n-gpu-layers 0 \
|
||||||
|
--cache-type-k "$KV_CACHE_TYPE" \
|
||||||
|
--cache-type-v "$KV_CACHE_TYPE" \
|
||||||
|
--temp "$TEMPERATURE" \
|
||||||
|
--min-p "$MIN_P" \
|
||||||
|
--repeat-penalty "$REPEAT_PENALTY" \
|
||||||
|
--presence-penalty "$PRESENCE_PENALTY" \
|
||||||
|
--top-k "$TOP_K" \
|
||||||
|
--top-p "$TOP_P" \
|
||||||
|
--flash-attn \
|
||||||
|
--prompt "$PROMPT" \
|
||||||
|
--n-predict 512 \
|
||||||
|
--color
|
||||||
|
fi
|
||||||
Reference in New Issue
Block a user