From a1193d64c45d6d9b26d80613c880ee82eea183c6 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Tue, 16 Jun 2026 17:16:24 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: second-state/Phi-4-mini-reasoning-GGUF Source: Original Platform --- .gitattributes | 48 +++++++++++ Phi-4-mini-reasoning-Q2_K.gguf | 3 + Phi-4-mini-reasoning-Q3_K_L.gguf | 3 + Phi-4-mini-reasoning-Q3_K_M.gguf | 3 + Phi-4-mini-reasoning-Q3_K_S.gguf | 3 + Phi-4-mini-reasoning-Q4_0.gguf | 3 + Phi-4-mini-reasoning-Q4_K_M.gguf | 3 + Phi-4-mini-reasoning-Q4_K_S.gguf | 3 + Phi-4-mini-reasoning-Q5_0.gguf | 3 + Phi-4-mini-reasoning-Q5_K_M.gguf | 3 + Phi-4-mini-reasoning-Q5_K_S.gguf | 3 + Phi-4-mini-reasoning-Q6_K.gguf | 3 + Phi-4-mini-reasoning-Q8_0.gguf | 3 + Phi-4-mini-reasoning-f16.gguf | 3 + README.md | 82 ++++++++++++++++++ config.json | 138 +++++++++++++++++++++++++++++++ 16 files changed, 307 insertions(+) create mode 100644 .gitattributes create mode 100644 Phi-4-mini-reasoning-Q2_K.gguf create mode 100644 Phi-4-mini-reasoning-Q3_K_L.gguf create mode 100644 Phi-4-mini-reasoning-Q3_K_M.gguf create mode 100644 Phi-4-mini-reasoning-Q3_K_S.gguf create mode 100644 Phi-4-mini-reasoning-Q4_0.gguf create mode 100644 Phi-4-mini-reasoning-Q4_K_M.gguf create mode 100644 Phi-4-mini-reasoning-Q4_K_S.gguf create mode 100644 Phi-4-mini-reasoning-Q5_0.gguf create mode 100644 Phi-4-mini-reasoning-Q5_K_M.gguf create mode 100644 Phi-4-mini-reasoning-Q5_K_S.gguf create mode 100644 Phi-4-mini-reasoning-Q6_K.gguf create mode 100644 Phi-4-mini-reasoning-Q8_0.gguf create mode 100644 Phi-4-mini-reasoning-f16.gguf create mode 100644 README.md create mode 100644 config.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..8e312d0 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,48 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +Phi-4-mini-reasoning-Q2_K.gguf filter=lfs diff=lfs merge=lfs -text +Phi-4-mini-reasoning-Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text +Phi-4-mini-reasoning-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Phi-4-mini-reasoning-Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text +Phi-4-mini-reasoning-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text +Phi-4-mini-reasoning-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Phi-4-mini-reasoning-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text +Phi-4-mini-reasoning-Q5_0.gguf filter=lfs diff=lfs merge=lfs -text +Phi-4-mini-reasoning-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Phi-4-mini-reasoning-Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text +Phi-4-mini-reasoning-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text +Phi-4-mini-reasoning-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text +Phi-4-mini-reasoning-f16.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/Phi-4-mini-reasoning-Q2_K.gguf b/Phi-4-mini-reasoning-Q2_K.gguf new file mode 100644 index 0000000..98bfe6e --- /dev/null +++ b/Phi-4-mini-reasoning-Q2_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20acd09ffeca78664d760b259338a2da6a6e488ba12eff9f6a8f20d32fdcaeb5 +size 1682636032 diff --git a/Phi-4-mini-reasoning-Q3_K_L.gguf b/Phi-4-mini-reasoning-Q3_K_L.gguf new file mode 100644 index 0000000..b570ebf --- /dev/null +++ b/Phi-4-mini-reasoning-Q3_K_L.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ecb08bf01dd2eca699018f72e356da18114b1ee71a896c0ed1480370d33ef5b +size 2249653504 diff --git a/Phi-4-mini-reasoning-Q3_K_M.gguf b/Phi-4-mini-reasoning-Q3_K_M.gguf new file mode 100644 index 0000000..b083e87 --- /dev/null +++ b/Phi-4-mini-reasoning-Q3_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5daef984c39dfcde342bcf4bb0ee96b434a86d2ae1a6102d70b49028534f94e2 +size 2117532928 diff --git a/Phi-4-mini-reasoning-Q3_K_S.gguf b/Phi-4-mini-reasoning-Q3_K_S.gguf new file mode 100644 index 0000000..43e08fc --- /dev/null +++ b/Phi-4-mini-reasoning-Q3_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f7efed5d9574074697d2dde2024fc6bd8b251a458bfd3eb89fd267a86f38529 +size 1897331968 diff --git a/Phi-4-mini-reasoning-Q4_0.gguf b/Phi-4-mini-reasoning-Q4_0.gguf new file mode 100644 index 0000000..391721e --- /dev/null +++ b/Phi-4-mini-reasoning-Q4_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394a8b52b77b622e1771a4d3ed5536d813cf486d107c35b892361c18049829a3 +size 2325150976 diff --git a/Phi-4-mini-reasoning-Q4_K_M.gguf b/Phi-4-mini-reasoning-Q4_K_M.gguf new file mode 100644 index 0000000..23729e4 --- /dev/null +++ b/Phi-4-mini-reasoning-Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba07818d4c81a29dc755825c7eb5fc025c2b287487d59063a16260197ea7694a +size 2491874560 diff --git a/Phi-4-mini-reasoning-Q4_K_S.gguf b/Phi-4-mini-reasoning-Q4_K_S.gguf new file mode 100644 index 0000000..9452b3e --- /dev/null +++ b/Phi-4-mini-reasoning-Q4_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37c00c3b3e4a82b70e449883a07a0c5966950a9cd121a55b5cee6ce08ec3c8b3 +size 2337733888 diff --git a/Phi-4-mini-reasoning-Q5_0.gguf b/Phi-4-mini-reasoning-Q5_0.gguf new file mode 100644 index 0000000..783c4ae --- /dev/null +++ b/Phi-4-mini-reasoning-Q5_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2f3a2e3dd0b4fd837f9cd57430be341b17b64525cff557b155fd0d9b5d21748 +size 2727804160 diff --git a/Phi-4-mini-reasoning-Q5_K_M.gguf b/Phi-4-mini-reasoning-Q5_K_M.gguf new file mode 100644 index 0000000..52c043e --- /dev/null +++ b/Phi-4-mini-reasoning-Q5_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1095c4697ca34cd105de2fcfdc7c4acde3020d7409255091111b7b3407f267c1 +size 2848128256 diff --git a/Phi-4-mini-reasoning-Q5_K_S.gguf b/Phi-4-mini-reasoning-Q5_K_S.gguf new file mode 100644 index 0000000..56172a5 --- /dev/null +++ b/Phi-4-mini-reasoning-Q5_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9405cabb92a2647d2f089b34286361dae709a995b6d18f3bc328b72ad0967b0e +size 2727804160 diff --git a/Phi-4-mini-reasoning-Q6_K.gguf b/Phi-4-mini-reasoning-Q6_K.gguf new file mode 100644 index 0000000..a688771 --- /dev/null +++ b/Phi-4-mini-reasoning-Q6_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ff4fba6515dd1a77a8d45f4f33e598a664c8fa7f57725ea9f194a23f440442d +size 3155623168 diff --git a/Phi-4-mini-reasoning-Q8_0.gguf b/Phi-4-mini-reasoning-Q8_0.gguf new file mode 100644 index 0000000..8e207d9 --- /dev/null +++ b/Phi-4-mini-reasoning-Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d709abf9ba3ab5cab68aefc52688e2cd373866afacea22bcde8883a5b760acb7 +size 4084611328 diff --git a/Phi-4-mini-reasoning-f16.gguf b/Phi-4-mini-reasoning-f16.gguf new file mode 100644 index 0000000..558eb99 --- /dev/null +++ b/Phi-4-mini-reasoning-f16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:600ab28baffdc1915c00da6ffb238519f32501766c3b219047d9f6cb67f0f83d +size 7680694528 diff --git a/README.md b/README.md new file mode 100644 index 0000000..5e754aa --- /dev/null +++ b/README.md @@ -0,0 +1,82 @@ +--- +license: mit +license_link: https://huggingface.co/microsoft/Phi-4-mini-instruct-reasoning/resolve/main/LICENSE +pipeline_tag: text-generation +library_name: transformers +model_creator: Microsoft +model_name: Phi-4-mini-reasoning +quantized_by: Second State Inc. +language: +- en +--- + + + +
+ +
+
+ + +# Phi-4-mini-reasoning-GGUF + +## Original Model + +[microsoft/Phi-4-mini-reasoning](https://huggingface.co/microsoft/Phi-4-mini-reasoning) + +## Run with LlamaEdge + +- LlamaEdge version: coming soon + + + +- Prompt template + + - Prompt type: `phi-4-chat` + + - Prompt string + + ```text + <|system|>Insert System Message<|end|><|user|>Insert User Message<|end|><|assistant|> + ``` + +- Context size: `128000` + +- Run as LlamaEdge service + + ```bash + wasmedge --dir .:. --nn-preload default:GGML:AUTO:Phi-4-mini-reasoning-Q5_K_M.gguf \ + llama-api-server.wasm \ + --prompt-template phi-4-chat \ + --ctx-size 128000 \ + --model-name Phi-4-mini-reasoning + ``` + +- Run as LlamaEdge command app + + ```bash + wasmedge --dir .:. --nn-preload default:GGML:AUTO:Phi-4-mini-reasoning-Q5_K_M.gguf \ + llama-chat.wasm \ + --prompt-template phi-4-chat \ + --ctx-size 128000 + ``` + +## Quantized GGUF Models + +| Name | Quant method | Bits | Size | Use case | +| ---- | ---- | ---- | ---- | ----- | +| [Phi-4-mini-reasoning-Q2_K.gguf](https://huggingface.co/second-state/Phi-4-mini-reasoning-GGUF/blob/main/Phi-4-mini-reasoning-Q2_K.gguf) | Q2_K | 2 | 1.68 GB| smallest, significant quality loss - not recommended for most purposes | +| [Phi-4-mini-reasoning-Q3_K_L.gguf](https://huggingface.co/second-state/Phi-4-mini-reasoning-GGUF/blob/main/Phi-4-mini-reasoning-Q3_K_L.gguf) | Q3_K_L | 3 | 2.25 GB| small, substantial quality loss | +| [Phi-4-mini-reasoning-Q3_K_M.gguf](https://huggingface.co/second-state/Phi-4-mini-reasoning-GGUF/blob/main/Phi-4-mini-reasoning-Q3_K_M.gguf) | Q3_K_M | 3 | 2.12 GB| very small, high quality loss | +| [Phi-4-mini-reasoning-Q3_K_S.gguf](https://huggingface.co/second-state/Phi-4-mini-reasoning-GGUF/blob/main/Phi-4-mini-reasoning-Q3_K_S.gguf) | Q3_K_S | 3 | 1.90 GB| very small, high quality loss | +| [Phi-4-mini-reasoning-Q4_0.gguf](https://huggingface.co/second-state/Phi-4-mini-reasoning-GGUF/blob/main/Phi-4-mini-reasoning-Q4_0.gguf) | Q4_0 | 4 | 2.33 GB| legacy; small, very high quality loss - prefer using Q3_K_M | +| [Phi-4-mini-reasoning-Q4_K_M.gguf](https://huggingface.co/second-state/Phi-4-mini-reasoning-GGUF/blob/main/Phi-4-mini-reasoning-Q4_K_M.gguf) | Q4_K_M | 4 | 2.49 GB| medium, balanced quality - recommended | +| [Phi-4-mini-reasoning-Q4_K_S.gguf](https://huggingface.co/second-state/Phi-4-mini-reasoning-GGUF/blob/main/Phi-4-mini-reasoning-Q4_K_S.gguf) | Q4_K_S | 4 | 2.34 GB| small, greater quality loss | +| [Phi-4-mini-reasoning-Q5_0.gguf](https://huggingface.co/second-state/Phi-4-mini-reasoning-GGUF/blob/main/Phi-4-mini-reasoning-Q5_0.gguf) | Q5_0 | 5 | 2.73 GB| legacy; medium, balanced quality - prefer using Q4_K_M | +| [Phi-4-mini-reasoning-Q5_K_M.gguf](https://huggingface.co/second-state/Phi-4-mini-reasoning-GGUF/blob/main/Phi-4-mini-reasoning-Q5_K_M.gguf) | Q5_K_M | 5 | 2.85 GB| large, very low quality loss - recommended | +| [Phi-4-mini-reasoning-Q5_K_S.gguf](https://huggingface.co/second-state/Phi-4-mini-reasoning-GGUF/blob/main/Phi-4-mini-reasoning-Q5_K_S.gguf) | Q5_K_S | 5 | 2.73 GB| large, low quality loss - recommended | +| [Phi-4-mini-reasoning-Q6_K.gguf](https://huggingface.co/second-state/Phi-4-mini-reasoning-GGUF/blob/main/Phi-4-mini-reasoning-Q6_K.gguf) | Q6_K | 6 | 3.16 GB| very large, extremely low quality loss | +| [Phi-4-mini-reasoning-Q8_0.gguf](https://huggingface.co/second-state/Phi-4-mini-reasoning-GGUF/blob/main/Phi-4-mini-reasoning-Q8_0.gguf) | Q8_0 | 8 | 4.08 GB| very large, extremely low quality loss - not recommended | +| [Phi-4-mini-reasoning-f16.gguf](https://huggingface.co/second-state/Phi-4-mini-reasoning-GGUF/blob/main/Phi-4-mini-reasoning-f16.gguf) | f16 | 16 | 7.68 GB| | + +*Quantized with llama.cpp b5201.* diff --git a/config.json b/config.json new file mode 100644 index 0000000..4bcc7ea --- /dev/null +++ b/config.json @@ -0,0 +1,138 @@ +{ + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 199999, + "embd_pdrop": 0.0, + "eos_token_id": 199999, + "full_attn_mod": 1, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "interpolate_factor": 1, + "lm_head_bias": false, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "phi3", + "num_attention_heads": 24, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "original_max_position_embeddings": 4096, + "pad_token_id": 199999, + "partial_rotary_factor": 0.75, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "long_factor": [ + 1, + 1.118320672, + 1.250641126, + 1.398617824, + 1.564103225, + 1.74916897, + 1.956131817, + 2.187582649, + 2.446418898, + 2.735880826, + 3.059592084, + 3.421605075, + 3.826451687, + 4.279200023, + 4.785517845, + 5.351743533, + 5.984965424, + 6.693110555, + 7.485043894, + 8.370679318, + 9.36110372, + 10.4687158, + 11.70738129, + 13.09260651, + 14.64173252, + 16.37415215, + 18.31155283, + 20.47818807, + 22.90118105, + 25.61086418, + 28.64115884, + 32.03, + 32.1, + 32.13, + 32.23, + 32.6, + 32.61, + 32.64, + 32.66, + 32.7, + 32.71, + 32.93, + 32.97, + 33.28, + 33.49, + 33.5, + 44.16, + 47.77 + ], + "short_factor": [ + 1, + 1.118320672, + 1.250641126, + 1.398617824, + 1.564103225, + 1.74916897, + 1.956131817, + 2.187582649, + 2.446418898, + 2.735880826, + 3.059592084, + 3.421605075, + 3.826451687, + 4.279200023, + 4.785517845, + 5.351743533, + 5.984965424, + 6.693110555, + 7.485043894, + 8.370679318, + 9.36110372, + 10.4687158, + 11.70738129, + 13.09260651, + 14.64173252, + 16.37415215, + 18.31155283, + 20.47818807, + 22.90118105, + 25.61086418, + 28.64115884, + 32.03, + 32.1, + 32.13, + 32.23, + 32.6, + 32.61, + 32.64, + 32.66, + 32.7, + 32.71, + 32.93, + 32.97, + 33.28, + 33.49, + 33.5, + 44.16, + 47.77 + ], + "type": "longrope" + }, + "rope_theta": 10000.0, + "sliding_window": 262144, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.50.0", + "use_cache": true, + "vocab_size": 200064 +}