初始化项目,由ModelHub XC社区提供模型
Model: zhangsq-nju/Qwen3-1.7B-EdgeRazor-GGUF Source: Original Platform
This commit is contained in:
40
.gitattributes
vendored
Normal file
40
.gitattributes
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen3-1.7B-BF16.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen3-1.7B-EdgeRazor-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen3-1.7B-EdgeRazor-TQ1_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen3-1.7B-EdgeRazor-TQ2_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
asset/Logo-HF.png filter=lfs diff=lfs merge=lfs -text
|
||||
3
Qwen3-1.7B-BF16.gguf
Normal file
3
Qwen3-1.7B-BF16.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d4ca7ef8959fa7e23e656b80e39a1f5400fc3f2d427133ee84dc2a57633b5a1d
|
||||
size 4069679360
|
||||
3
Qwen3-1.7B-EdgeRazor-Q4_0.gguf
Normal file
3
Qwen3-1.7B-EdgeRazor-Q4_0.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a876b60cf705906a10fceac52427b54abdc5511ad7751929bb37d7590cc12439
|
||||
size 1054423360
|
||||
3
Qwen3-1.7B-EdgeRazor-TQ1_0.gguf
Normal file
3
Qwen3-1.7B-EdgeRazor-TQ1_0.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d665083d69b7c51bed4f16fbbeab196cd6fc0242aeca37092710a8f7f14a55d4
|
||||
size 478748992
|
||||
3
Qwen3-1.7B-EdgeRazor-TQ2_0.gguf
Normal file
3
Qwen3-1.7B-EdgeRazor-TQ2_0.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c7a8c4018eb0505d0a686110a135c311cdd952c0cf8361e7504513b733b1ca1c
|
||||
size 544809280
|
||||
81
README.md
Normal file
81
README.md
Normal file
@@ -0,0 +1,81 @@
|
||||
---
|
||||
base_model: Qwen/Qwen3-1.7B
|
||||
pipeline_tag: text-generation
|
||||
tags:
|
||||
- qwen3
|
||||
- edgerazor
|
||||
- quantization
|
||||
license: apache-2.0
|
||||
license_link: https://huggingface.co/Qwen/Qwen3-1.7B/blob/main/LICENSE
|
||||
---
|
||||
|
||||
<div align="center">
|
||||
<br/>
|
||||
<img src="./asset/Logo-HF.png" alt="EdgeRazor Logo" width="60%">
|
||||
<h3>
|
||||
EdgeRazor for Lightweight LLMs
|
||||
</h3>
|
||||
|
||||
<p>
|
||||
<a href="https://arxiv.org/abs/2605.04062" target="blank">
|
||||
<img src="https://img.shields.io/badge/arXiv-EdgeRazor-b31b1b?style=flat&logo=arxiv" alt="arXiv EdgeRazor">
|
||||
</a>
|
||||
<a href="https://github.com/zhangsq-nju/EdgeRazor" target="blank">
|
||||
<img src="https://img.shields.io/badge/GitHub-EdgeRazor-blue?style=flat&logo=github" alt="GitHub EdgeRazor">
|
||||
</a>
|
||||
<a href="https://pypi.org/project/edgerazor/" target="blank">
|
||||
<img src="https://img.shields.io/pypi/v/edgerazor?style=flat&logo=pypi" alt="PyPI EdgeRazor">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
## Contents
|
||||
|
||||
- [Contents](#contents)
|
||||
- [Model Overview](#model-overview)
|
||||
- [Model Bit-Widths](#model-bit-widths)
|
||||
- [Get Started](#get-started)
|
||||
- [Citation](#citation)
|
||||
|
||||
## Model Overview
|
||||
|
||||
- Base Model: [Qwen/Qwen3-1.7B](https://huggingface.co/Qwen/Qwen3-1.7B)
|
||||
- Training: [zhangsq-nju/EdgeRazor](https://github.com/zhangsq-nju/EdgeRazor)
|
||||
- Inference: [ggml-org/llama.cpp](https://github.com/ggml-org/llama.cpp)
|
||||
|
||||
## Model Bit-Widths
|
||||
|
||||
| Mixed-Precision Recipe | Bit-Width | This Repo | GGUF Type |
|
||||
| ---------------------------- | --------- | --------- | ------------- |
|
||||
| 100% 4-bit + 0% 1.58-bit | 4 | ✔️ | Q4_0 |
|
||||
| 50% 4-bit + 50% 1.58-bit | 2.79 | ✖️ | Not supported |
|
||||
| 12.5% 4-bit + 87.5% 1.58-bit | 1.88 | ✖️ | Not supported |
|
||||
| 0% 4-bit + 100% 1.58-bit | 1.58 | ✔️ | TQ1_0, TQ2_0 |
|
||||
|
||||
## Get Started
|
||||
|
||||
Use llama.cpp to conduct efficient inference on edge devices.
|
||||
|
||||
Check the [cli.sh](./cli.sh) script for basic usage.
|
||||
|
||||
Model list:
|
||||
|
||||
- `Qwen3-1.7B-BF16.gguf`: BF16 model from the original Qwen3-1.7B
|
||||
- `Qwen3-1.7B-EdgeRazor-Q4_0.gguf`: Q4_0 model from the [Qwen3-1.7B-EdgeRazor-4bit](https://huggingface.co/zhangsq-nju/Qwen3-1.7B-EdgeRazor-4bit)
|
||||
- `Qwen3-1.7B-EdgeRazor-TQ1_0.gguf`: TQ1_0 model from [Qwen3-1.7B-EdgeRazor-1.58bit](https://huggingface.co/zhangsq-nju/Qwen3-1.7B-EdgeRazor-1.58bit)
|
||||
- `Qwen3-1.7B-EdgeRazor-TQ2_0.gguf`: TQ2_0 model from [Qwen3-1.7B-EdgeRazor-1.58bit](https://huggingface.co/zhangsq-nju/Qwen3-1.7B-EdgeRazor-1.58bit)
|
||||
|
||||
## Citation
|
||||
|
||||
If you find our project useful in your research, please consider kindly citing our papers ✏️:
|
||||
|
||||
```
|
||||
@article{zhangsh-edgerazor,
|
||||
title={{EdgeRazor}: A Lightweight Framework for Large Language Models via Mixed-Precision Quantization-Aware Distillation},
|
||||
author={Shu-Hao Zhang and Le-Tong Huang and Xiang-Sheng Deng and Xin-Yi Zou and Chen Wu and Nan Li and Shao-Qun Zhang},
|
||||
year={2026},
|
||||
journal={arXiv preprint arXiv:2605.04062}
|
||||
}
|
||||
```
|
||||
3
asset/Logo-HF.png
Normal file
3
asset/Logo-HF.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fba2b7d652f19541cdcaf68c7c6c14e3e92f5c651758d9f08cb7178b195c2a91
|
||||
size 617767
|
||||
38
asset/Logo-HF.svg
Normal file
38
asset/Logo-HF.svg
Normal file
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 159 KiB |
86
cli.sh
Normal file
86
cli.sh
Normal file
@@ -0,0 +1,86 @@
|
||||
#!/bin/bash
|
||||
# For W[N]-A8-KV8, Apple CPU-only Inference: --n-gpu-layers 0
|
||||
|
||||
CLI=llama-cli
|
||||
KV_CACHE_TYPE=q8_0
|
||||
|
||||
# Inference parameters for non-thinking mode
|
||||
TEMPERATURE=0.6
|
||||
MIN_P=0.00
|
||||
REPEAT_PENALTY=1.0
|
||||
PRESENCE_PENALTY=1.5
|
||||
TOP_K=20
|
||||
TOP_P=0.95
|
||||
|
||||
MODELS=(
|
||||
./Qwen3-1.7B-EdgeRazor-TQ2_0.gguf
|
||||
./Qwen3-1.7B-EdgeRazor-TQ1_0.gguf
|
||||
./Qwen3-1.7B-EdgeRazor-Q4_0.gguf
|
||||
./Qwen3-1.7B-BF16.gguf
|
||||
)
|
||||
|
||||
# Show available model list
|
||||
echo "Available models:"
|
||||
for i in "${!MODELS[@]}"; do
|
||||
echo " $i) ${MODELS[$i]}"
|
||||
done
|
||||
|
||||
# Select model (default to the first one)
|
||||
if [ -z "$1" ]; then
|
||||
echo ""
|
||||
echo "Usage: $0 <model_index> [prompt]"
|
||||
echo " model_index: 0, 1, or 2 (default: 0)"
|
||||
echo " prompt: optional prompt for non-interactive mode"
|
||||
echo ""
|
||||
MODEL_INDEX=0
|
||||
else
|
||||
MODEL_INDEX=$1
|
||||
fi
|
||||
|
||||
MODEL="${MODELS[$MODEL_INDEX]}"
|
||||
|
||||
if [ ! -f "$MODEL" ]; then
|
||||
echo "Error: Model file not found: $MODEL"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Selected model: $MODEL"
|
||||
echo ""
|
||||
|
||||
# Run CLI
|
||||
if [ -z "$2" ]; then
|
||||
# Interactive mode
|
||||
$CLI \
|
||||
--model "$MODEL" \
|
||||
--n-gpu-layers 0 \
|
||||
--cache-type-k "$KV_CACHE_TYPE" \
|
||||
--cache-type-v "$KV_CACHE_TYPE" \
|
||||
--temp "$TEMPERATURE" \
|
||||
--min-p "$MIN_P" \
|
||||
--repeat-penalty "$REPEAT_PENALTY" \
|
||||
--presence-penalty "$PRESENCE_PENALTY" \
|
||||
--top-k "$TOP_K" \
|
||||
--top-p "$TOP_P" \
|
||||
--flash-attn \
|
||||
--conversation \
|
||||
--interactive-first \
|
||||
--color
|
||||
else
|
||||
# Non-interactive mode (single inference)
|
||||
PROMPT="$2"
|
||||
$CLI \
|
||||
--model "$MODEL" \
|
||||
--n-gpu-layers 0 \
|
||||
--cache-type-k "$KV_CACHE_TYPE" \
|
||||
--cache-type-v "$KV_CACHE_TYPE" \
|
||||
--temp "$TEMPERATURE" \
|
||||
--min-p "$MIN_P" \
|
||||
--repeat-penalty "$REPEAT_PENALTY" \
|
||||
--presence-penalty "$PRESENCE_PENALTY" \
|
||||
--top-k "$TOP_K" \
|
||||
--top-p "$TOP_P" \
|
||||
--flash-attn \
|
||||
--prompt "$PROMPT" \
|
||||
--n-predict 512 \
|
||||
--color
|
||||
fi
|
||||
Reference in New Issue
Block a user