From 9662933715e16b0bb60ceb1e279ce4e56aed6eb0 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Fri, 5 Jun 2026 15:44:17 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: zhangsq-nju/Qwen3-1.7B-EdgeRazor-GGUF Source: Original Platform --- .gitattributes | 40 +++++++++++++++ Qwen3-1.7B-BF16.gguf | 3 ++ Qwen3-1.7B-EdgeRazor-Q4_0.gguf | 3 ++ Qwen3-1.7B-EdgeRazor-TQ1_0.gguf | 3 ++ Qwen3-1.7B-EdgeRazor-TQ2_0.gguf | 3 ++ README.md | 81 +++++++++++++++++++++++++++++++ asset/Logo-HF.png | 3 ++ asset/Logo-HF.svg | 38 +++++++++++++++ cli.sh | 86 +++++++++++++++++++++++++++++++++ params | 14 ++++++ 10 files changed, 274 insertions(+) create mode 100644 .gitattributes create mode 100644 Qwen3-1.7B-BF16.gguf create mode 100644 Qwen3-1.7B-EdgeRazor-Q4_0.gguf create mode 100644 Qwen3-1.7B-EdgeRazor-TQ1_0.gguf create mode 100644 Qwen3-1.7B-EdgeRazor-TQ2_0.gguf create mode 100644 README.md create mode 100644 asset/Logo-HF.png create mode 100644 asset/Logo-HF.svg create mode 100644 cli.sh create mode 100644 params diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..6154d89 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,40 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +Qwen3-1.7B-BF16.gguf filter=lfs diff=lfs merge=lfs -text +Qwen3-1.7B-EdgeRazor-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text +Qwen3-1.7B-EdgeRazor-TQ1_0.gguf filter=lfs diff=lfs merge=lfs -text +Qwen3-1.7B-EdgeRazor-TQ2_0.gguf filter=lfs diff=lfs merge=lfs -text +asset/Logo-HF.png filter=lfs diff=lfs merge=lfs -text diff --git a/Qwen3-1.7B-BF16.gguf b/Qwen3-1.7B-BF16.gguf new file mode 100644 index 0000000..7e25de9 --- /dev/null +++ b/Qwen3-1.7B-BF16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4ca7ef8959fa7e23e656b80e39a1f5400fc3f2d427133ee84dc2a57633b5a1d +size 4069679360 diff --git a/Qwen3-1.7B-EdgeRazor-Q4_0.gguf b/Qwen3-1.7B-EdgeRazor-Q4_0.gguf new file mode 100644 index 0000000..e377fdf --- /dev/null +++ b/Qwen3-1.7B-EdgeRazor-Q4_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a876b60cf705906a10fceac52427b54abdc5511ad7751929bb37d7590cc12439 +size 1054423360 diff --git a/Qwen3-1.7B-EdgeRazor-TQ1_0.gguf b/Qwen3-1.7B-EdgeRazor-TQ1_0.gguf new file mode 100644 index 0000000..1f9b9ff --- /dev/null +++ b/Qwen3-1.7B-EdgeRazor-TQ1_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d665083d69b7c51bed4f16fbbeab196cd6fc0242aeca37092710a8f7f14a55d4 +size 478748992 diff --git a/Qwen3-1.7B-EdgeRazor-TQ2_0.gguf b/Qwen3-1.7B-EdgeRazor-TQ2_0.gguf new file mode 100644 index 0000000..07d56b3 --- /dev/null +++ b/Qwen3-1.7B-EdgeRazor-TQ2_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7a8c4018eb0505d0a686110a135c311cdd952c0cf8361e7504513b733b1ca1c +size 544809280 diff --git a/README.md b/README.md new file mode 100644 index 0000000..5603a4d --- /dev/null +++ b/README.md @@ -0,0 +1,81 @@ +--- +base_model: Qwen/Qwen3-1.7B +pipeline_tag: text-generation +tags: +- qwen3 +- edgerazor +- quantization +license: apache-2.0 +license_link: https://huggingface.co/Qwen/Qwen3-1.7B/blob/main/LICENSE +--- + +
+
+ EdgeRazor Logo +

+ EdgeRazor for Lightweight LLMs +

+ +

+ + arXiv EdgeRazor + + + GitHub EdgeRazor + + + PyPI EdgeRazor + +

+ + +
+ +## Contents + +- [Contents](#contents) +- [Model Overview](#model-overview) +- [Model Bit-Widths](#model-bit-widths) +- [Get Started](#get-started) +- [Citation](#citation) + +## Model Overview + +- Base Model: [Qwen/Qwen3-1.7B](https://huggingface.co/Qwen/Qwen3-1.7B) +- Training: [zhangsq-nju/EdgeRazor](https://github.com/zhangsq-nju/EdgeRazor) +- Inference: [ggml-org/llama.cpp](https://github.com/ggml-org/llama.cpp) + +## Model Bit-Widths + +| Mixed-Precision Recipe | Bit-Width | This Repo | GGUF Type | +| ---------------------------- | --------- | --------- | ------------- | +| 100% 4-bit + 0% 1.58-bit | 4 | ✔️ | Q4_0 | +| 50% 4-bit + 50% 1.58-bit | 2.79 | ✖️ | Not supported | +| 12.5% 4-bit + 87.5% 1.58-bit | 1.88 | ✖️ | Not supported | +| 0% 4-bit + 100% 1.58-bit | 1.58 | ✔️ | TQ1_0, TQ2_0 | + +## Get Started + +Use llama.cpp to conduct efficient inference on edge devices. + +Check the [cli.sh](./cli.sh) script for basic usage. + +Model list: + +- `Qwen3-1.7B-BF16.gguf`: BF16 model from the original Qwen3-1.7B +- `Qwen3-1.7B-EdgeRazor-Q4_0.gguf`: Q4_0 model from the [Qwen3-1.7B-EdgeRazor-4bit](https://huggingface.co/zhangsq-nju/Qwen3-1.7B-EdgeRazor-4bit) +- `Qwen3-1.7B-EdgeRazor-TQ1_0.gguf`: TQ1_0 model from [Qwen3-1.7B-EdgeRazor-1.58bit](https://huggingface.co/zhangsq-nju/Qwen3-1.7B-EdgeRazor-1.58bit) +- `Qwen3-1.7B-EdgeRazor-TQ2_0.gguf`: TQ2_0 model from [Qwen3-1.7B-EdgeRazor-1.58bit](https://huggingface.co/zhangsq-nju/Qwen3-1.7B-EdgeRazor-1.58bit) + +## Citation + +If you find our project useful in your research, please consider kindly citing our papers ✏️: + +``` +@article{zhangsh-edgerazor, + title={{EdgeRazor}: A Lightweight Framework for Large Language Models via Mixed-Precision Quantization-Aware Distillation}, + author={Shu-Hao Zhang and Le-Tong Huang and Xiang-Sheng Deng and Xin-Yi Zou and Chen Wu and Nan Li and Shao-Qun Zhang}, + year={2026}, + journal={arXiv preprint arXiv:2605.04062} +} +``` \ No newline at end of file diff --git a/asset/Logo-HF.png b/asset/Logo-HF.png new file mode 100644 index 0000000..2eef44e --- /dev/null +++ b/asset/Logo-HF.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba2b7d652f19541cdcaf68c7c6c14e3e92f5c651758d9f08cb7178b195c2a91 +size 617767 diff --git a/asset/Logo-HF.svg b/asset/Logo-HF.svg new file mode 100644 index 0000000..74ad86a --- /dev/null +++ b/asset/Logo-HF.svg @@ -0,0 +1,38 @@ + + + + + + 16-bit n-bit + + \ No newline at end of file diff --git a/cli.sh b/cli.sh new file mode 100644 index 0000000..3810e40 --- /dev/null +++ b/cli.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# For W[N]-A8-KV8, Apple CPU-only Inference: --n-gpu-layers 0 + +CLI=llama-cli +KV_CACHE_TYPE=q8_0 + +# Inference parameters for non-thinking mode +TEMPERATURE=0.6 +MIN_P=0.00 +REPEAT_PENALTY=1.0 +PRESENCE_PENALTY=1.5 +TOP_K=20 +TOP_P=0.95 + +MODELS=( + ./Qwen3-1.7B-EdgeRazor-TQ2_0.gguf + ./Qwen3-1.7B-EdgeRazor-TQ1_0.gguf + ./Qwen3-1.7B-EdgeRazor-Q4_0.gguf + ./Qwen3-1.7B-BF16.gguf +) + +# Show available model list +echo "Available models:" +for i in "${!MODELS[@]}"; do + echo " $i) ${MODELS[$i]}" +done + +# Select model (default to the first one) +if [ -z "$1" ]; then + echo "" + echo "Usage: $0 [prompt]" + echo " model_index: 0, 1, or 2 (default: 0)" + echo " prompt: optional prompt for non-interactive mode" + echo "" + MODEL_INDEX=0 +else + MODEL_INDEX=$1 +fi + +MODEL="${MODELS[$MODEL_INDEX]}" + +if [ ! -f "$MODEL" ]; then + echo "Error: Model file not found: $MODEL" + exit 1 +fi + +echo "Selected model: $MODEL" +echo "" + +# Run CLI +if [ -z "$2" ]; then + # Interactive mode + $CLI \ + --model "$MODEL" \ + --n-gpu-layers 0 \ + --cache-type-k "$KV_CACHE_TYPE" \ + --cache-type-v "$KV_CACHE_TYPE" \ + --temp "$TEMPERATURE" \ + --min-p "$MIN_P" \ + --repeat-penalty "$REPEAT_PENALTY" \ + --presence-penalty "$PRESENCE_PENALTY" \ + --top-k "$TOP_K" \ + --top-p "$TOP_P" \ + --flash-attn \ + --conversation \ + --interactive-first \ + --color +else + # Non-interactive mode (single inference) + PROMPT="$2" + $CLI \ + --model "$MODEL" \ + --n-gpu-layers 0 \ + --cache-type-k "$KV_CACHE_TYPE" \ + --cache-type-v "$KV_CACHE_TYPE" \ + --temp "$TEMPERATURE" \ + --min-p "$MIN_P" \ + --repeat-penalty "$REPEAT_PENALTY" \ + --presence-penalty "$PRESENCE_PENALTY" \ + --top-k "$TOP_K" \ + --top-p "$TOP_P" \ + --flash-attn \ + --prompt "$PROMPT" \ + --n-predict 512 \ + --color +fi \ No newline at end of file diff --git a/params b/params new file mode 100644 index 0000000..5f68460 --- /dev/null +++ b/params @@ -0,0 +1,14 @@ +{ + "stop": [ + "<|im_start|>", + "<|im_end|>" + ], + "temperature": 0.6, + "min_p" : 0.00, + "repeat_penalty" : 1.0, + "presence_penalty" : 1.5, + "top_k" : 20, + "top_p" : 0.95, + "num_predict" : 32768, + "num_ctx": 40960 +} \ No newline at end of file