From 8361dfc96ab7b03ece98700e7f581d1627bc6534 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Mon, 25 May 2026 16:16:16 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: NightPrince/Qwen3-4B-Islamic-Arabic-GGUF Source: Original Platform --- .gitattributes | 38 +++++++ README.md | 195 +++++++++++++++++++++++++++++++++++ qwen3-4b-islamic-f16.gguf | 3 + qwen3-4b-islamic-q4_k_m.gguf | 3 + qwen3-4b-islamic-q8_0.gguf | 3 + 5 files changed, 242 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 qwen3-4b-islamic-f16.gguf create mode 100644 qwen3-4b-islamic-q4_k_m.gguf create mode 100644 qwen3-4b-islamic-q8_0.gguf diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..47ee718 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,38 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +qwen3-4b-islamic-f16.gguf filter=lfs diff=lfs merge=lfs -text +qwen3-4b-islamic-q4_k_m.gguf filter=lfs diff=lfs merge=lfs -text +qwen3-4b-islamic-q8_0.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..53b91a9 --- /dev/null +++ b/README.md @@ -0,0 +1,195 @@ +--- +language: + - ar +license: apache-2.0 +library_name: gguf +base_model: NightPrince/Qwen3-4B-Islamic-Arabic +datasets: + - NightPrince/islamic-arabic-qa +tags: + - arabic + - islamic + - fiqh + - fatwa + - qwen3 + - gguf + - llama-cpp + - ollama + - quantized + - instruction-tuning +pipeline_tag: text-generation +--- + +# Qwen3-4B-Islamic-Arabic-GGUF + +**GGUF quantized versions of Qwen3-4B-Islamic-Arabic for llama.cpp, Ollama, and LM Studio.** + +This repository contains three GGUF files at different quantization levels, converted from [NightPrince/Qwen3-4B-Islamic-Arabic](https://huggingface.co/NightPrince/Qwen3-4B-Islamic-Arabic) (the merged FP16 model). All standard GGUF-compatible runtimes are supported: llama.cpp, Ollama, LM Studio, Jan, and others. + +Trained and converted by **[Yahya Alnwsany (NightPrince)](https://huggingface.co/NightPrince)** — 2026-05-05. + +--- + +## Files + +| File | Size | Recommended for | +|---|---|---| +| `qwen3-4b-islamic-q4_k_m.gguf` | 2.3 GB | **Most users** — best quality/size balance | +| `qwen3-4b-islamic-q8_0.gguf` | 4.0 GB | High quality, more RAM available | +| `qwen3-4b-islamic-f16.gguf` | 7.5 GB | Reference / re-quantization source | + +**Recommendation**: Start with `q4_k_m`. If you have 6+ GB of RAM headroom and want noticeably sharper Arabic output, use `q8_0`. The `f16` file is the lossless reference and is best used as a source for producing custom quantizations with llama.cpp's `llama-quantize`. + +--- + +## Model Variants + +| Variant | Repo | Description | +|---|---|---| +| **Merged FP16** | [NightPrince/Qwen3-4B-Islamic-Arabic](https://huggingface.co/NightPrince/Qwen3-4B-Islamic-Arabic) | Canonical merged model, FP16, ~7.6 GB — drop-in for transformers or vLLM | +| **LoRA Adapter** | [NightPrince/Qwen3-4B-Islamic-Arabic-LoRA](https://huggingface.co/NightPrince/Qwen3-4B-Islamic-Arabic-LoRA) | PEFT adapter only, 264 MB — apply on top of `Qwen/Qwen3-4B` | +| **INT4 Quantized** | [NightPrince/Qwen3-4B-Islamic-Arabic-INT4](https://huggingface.co/NightPrince/Qwen3-4B-Islamic-Arabic-INT4) | W4A16 compressed-tensors for fast vLLM serving, 2.5 GB | +| **MLX 4-bit** | [NightPrince/Qwen3-4B-Islamic-Arabic-mlx-4Bit](https://huggingface.co/NightPrince/Qwen3-4B-Islamic-Arabic-mlx-4Bit) | Apple Silicon / MLX — native Mac inference, 4-bit quantized | +| **GGUF** (this model) | [NightPrince/Qwen3-4B-Islamic-Arabic-GGUF](https://huggingface.co/NightPrince/Qwen3-4B-Islamic-Arabic-GGUF) | llama.cpp / Ollama / LM Studio — Q4_K_M (2.3 GB), Q8_0 (4.0 GB), F16 (7.5 GB) | +| **Dataset** | [NightPrince/islamic-arabic-qa](https://huggingface.co/datasets/NightPrince/islamic-arabic-qa) | 17,944 train / 2,101 val / 1,042 test — Islamic Arabic Q&A pairs | + +--- + +## Usage + +### Ollama + +**Step 1: Create a Modelfile** + +Save the following as `Modelfile` (no extension) in any directory: + +``` +FROM ./qwen3-4b-islamic-q4_k_m.gguf + +SYSTEM """أنت مساعد عالم إسلامي متخصص. أجب على الأسئلة بدقة استناداً إلى القرآن الكريم والسنة النبوية والفقه الإسلامي الكلاسيكي. استشهد بالمصادر حيثما أمكن. كن موجزاً لكن شاملاً.""" + +PARAMETER temperature 0.7 +PARAMETER top_p 0.9 +PARAMETER num_ctx 4096 +``` + +> Important: The `SYSTEM` field above contains the exact system prompt the model was fine-tuned with. Using it will produce the best results. + +**Step 2: Download the GGUF file** + +```bash +# Using huggingface-cli +pip install huggingface_hub +huggingface-cli download NightPrince/Qwen3-4B-Islamic-Arabic-GGUF \ + qwen3-4b-islamic-q4_k_m.gguf \ + --local-dir . +``` + +**Step 3: Build and run** + +```bash +# Create the Ollama model +ollama create qwen3-islamic -f Modelfile + +# Run interactively +ollama run qwen3-islamic + +# Or query via API +curl http://localhost:11434/api/generate -d '{ + "model": "qwen3-islamic", + "prompt": "ما حكم الاحتفال بالمولد النبوي الشريف؟", + "stream": false +}' +``` + +--- + +### llama.cpp + +**Build llama.cpp** (if not already installed): + +```bash +git clone https://github.com/ggerganov/llama.cpp +cd llama.cpp +make -j$(nproc) # CPU +# For CUDA: make GGML_CUDA=1 -j$(nproc) +``` + +**Download a GGUF file:** + +```bash +huggingface-cli download NightPrince/Qwen3-4B-Islamic-Arabic-GGUF \ + qwen3-4b-islamic-q4_k_m.gguf \ + --local-dir ./models +``` + +**Run the llama.cpp HTTP server:** + +```bash +./llama-server \ + --model ./models/qwen3-4b-islamic-q4_k_m.gguf \ + --ctx-size 4096 \ + --n-gpu-layers 99 \ + --host 0.0.0.0 \ + --port 8080 \ + --system-prompt "أنت مساعد عالم إسلامي متخصص. أجب على الأسئلة بدقة استناداً إلى القرآن الكريم والسنة النبوية والفقه الإسلامي الكلاسيكي. استشهد بالمصادر حيثما أمكن. كن موجزاً لكن شاملاً." +``` + +**CLI inference:** + +```bash +./llama-cli \ + --model ./models/qwen3-4b-islamic-q4_k_m.gguf \ + --ctx-size 4096 \ + --n-gpu-layers 99 \ + --chat-template qwen3 \ + --system-prompt "أنت مساعد عالم إسلامي متخصص. أجب على الأسئلة بدقة استناداً إلى القرآن الكريم والسنة النبوية والفقه الإسلامي الكلاسيكي. استشهد بالمصادر حيثما أمكن. كن موجزاً لكن شاملاً." \ + --prompt "ما هي أركان الإسلام الخمسة؟" \ + --n-predict 512 +``` + +--- + +### LM Studio + +1. Open **LM Studio** and go to the **Search** tab. +2. Search for `NightPrince/Qwen3-4B-Islamic-Arabic-GGUF`. +3. Download `qwen3-4b-islamic-q4_k_m.gguf` (recommended) from the file list. +4. Load the model and open the **Chat** tab. +5. In **System Prompt**, paste: + ``` + أنت مساعد عالم إسلامي متخصص. أجب على الأسئلة بدقة استناداً إلى القرآن الكريم والسنة النبوية والفقه الإسلامي الكلاسيكي. استشهد بالمصادر حيثما أمكن. كن موجزاً لكن شاملاً. + ``` +6. Set **Temperature** to `0.7` and **Context Length** to `4096` for best results. + +--- + +## Hardware Requirements + +| File | Min RAM (CPU) | Min VRAM (GPU offload) | +|---|---|---| +| `q4_k_m` (2.3 GB) | 4 GB | 3–4 GB | +| `q8_0` (4.0 GB) | 6 GB | 5–6 GB | +| `f16` (7.5 GB) | 10 GB | 8–10 GB | + +Use `--n-gpu-layers 99` in llama.cpp to offload all layers to GPU. Reduce the value if you run out of VRAM. + +--- + +## Citation + +```bibtex +@misc{alnwsany2026qwen3islamicarbic, + author = {Yahya Alnwsany}, + title = {Qwen3-4B-Islamic-Arabic: QLoRA Fine-Tuning of Qwen3-4B on Islamic Arabic Q\&A}, + year = {2026}, + howpublished = {\url{https://huggingface.co/NightPrince/Qwen3-4B-Islamic-Arabic}}, + note = {Base model: Qwen/Qwen3-4B. Dataset: NightPrince/islamic-arabic-qa.} +} +``` + +--- + +## License + +Apache 2.0 — consistent with the base model [Qwen/Qwen3-4B](https://huggingface.co/Qwen/Qwen3-4B). diff --git a/qwen3-4b-islamic-f16.gguf b/qwen3-4b-islamic-f16.gguf new file mode 100644 index 0000000..90fb1fc --- /dev/null +++ b/qwen3-4b-islamic-f16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:894aab77eb0f938aa07078bca49ed36cc1df1c956626c02552c8bf004ca15bba +size 8051284800 diff --git a/qwen3-4b-islamic-q4_k_m.gguf b/qwen3-4b-islamic-q4_k_m.gguf new file mode 100644 index 0000000..9f36915 --- /dev/null +++ b/qwen3-4b-islamic-q4_k_m.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d907f52954a81597f0b744e7ce3c9dea9778c116fded0f4e8c3f39bf45b7834 +size 2497280320 diff --git a/qwen3-4b-islamic-q8_0.gguf b/qwen3-4b-islamic-q8_0.gguf new file mode 100644 index 0000000..3b848db --- /dev/null +++ b/qwen3-4b-islamic-q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d11109087fbfc6b07bee06e1884d6a71a906c2d56131a659d9f5f1c657ccaa0 +size 4280404800