From 462b10aad035d892e0a6517db1a88d40a0adb34b Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Fri, 8 May 2026 17:59:28 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: SimplySara/Kai-3B-Instruct-i1-GGUF Source: Original Platform --- .gitattributes | 64 +++++++++++ Kai-3B-Instruct-detailed-bench.csv | 49 +++++++++ Kai-3B-Instruct-i1-IQ1_M.gguf | 3 + Kai-3B-Instruct-i1-IQ1_S.gguf | 3 + Kai-3B-Instruct-i1-IQ2_M.gguf | 3 + Kai-3B-Instruct-i1-IQ2_S.gguf | 3 + Kai-3B-Instruct-i1-IQ2_XS.gguf | 3 + Kai-3B-Instruct-i1-IQ2_XXS.gguf | 3 + Kai-3B-Instruct-i1-IQ3_M.gguf | 3 + Kai-3B-Instruct-i1-IQ3_S.gguf | 3 + Kai-3B-Instruct-i1-IQ3_XS.gguf | 3 + Kai-3B-Instruct-i1-IQ3_XXS.gguf | 3 + Kai-3B-Instruct-i1-IQ4_NL.gguf | 3 + Kai-3B-Instruct-i1-IQ4_XS.gguf | 3 + Kai-3B-Instruct-i1-MXFP4_MOE.gguf | 3 + Kai-3B-Instruct-i1-Q2_K.gguf | 3 + Kai-3B-Instruct-i1-Q2_K_S.gguf | 3 + Kai-3B-Instruct-i1-Q3_K_L.gguf | 3 + Kai-3B-Instruct-i1-Q3_K_M.gguf | 3 + Kai-3B-Instruct-i1-Q3_K_S.gguf | 3 + Kai-3B-Instruct-i1-Q4_0.gguf | 3 + Kai-3B-Instruct-i1-Q4_1.gguf | 3 + Kai-3B-Instruct-i1-Q4_K_M.gguf | 3 + Kai-3B-Instruct-i1-Q4_K_S.gguf | 3 + Kai-3B-Instruct-i1-Q5_0.gguf | 3 + Kai-3B-Instruct-i1-Q5_1.gguf | 3 + Kai-3B-Instruct-i1-Q5_K_M.gguf | 3 + Kai-3B-Instruct-i1-Q5_K_S.gguf | 3 + Kai-3B-Instruct-i1-Q6_K.gguf | 3 + Kai-3B-Instruct-i1-Q8_0.gguf | 3 + Kai-3B-Instruct.imatrix.gguf | 3 + README.md | 168 +++++++++++++++++++++++++++++ 32 files changed, 368 insertions(+) create mode 100644 .gitattributes create mode 100644 Kai-3B-Instruct-detailed-bench.csv create mode 100644 Kai-3B-Instruct-i1-IQ1_M.gguf create mode 100644 Kai-3B-Instruct-i1-IQ1_S.gguf create mode 100644 Kai-3B-Instruct-i1-IQ2_M.gguf create mode 100644 Kai-3B-Instruct-i1-IQ2_S.gguf create mode 100644 Kai-3B-Instruct-i1-IQ2_XS.gguf create mode 100644 Kai-3B-Instruct-i1-IQ2_XXS.gguf create mode 100644 Kai-3B-Instruct-i1-IQ3_M.gguf create mode 100644 Kai-3B-Instruct-i1-IQ3_S.gguf create mode 100644 Kai-3B-Instruct-i1-IQ3_XS.gguf create mode 100644 Kai-3B-Instruct-i1-IQ3_XXS.gguf create mode 100644 Kai-3B-Instruct-i1-IQ4_NL.gguf create mode 100644 Kai-3B-Instruct-i1-IQ4_XS.gguf create mode 100644 Kai-3B-Instruct-i1-MXFP4_MOE.gguf create mode 100644 Kai-3B-Instruct-i1-Q2_K.gguf create mode 100644 Kai-3B-Instruct-i1-Q2_K_S.gguf create mode 100644 Kai-3B-Instruct-i1-Q3_K_L.gguf create mode 100644 Kai-3B-Instruct-i1-Q3_K_M.gguf create mode 100644 Kai-3B-Instruct-i1-Q3_K_S.gguf create mode 100644 Kai-3B-Instruct-i1-Q4_0.gguf create mode 100644 Kai-3B-Instruct-i1-Q4_1.gguf create mode 100644 Kai-3B-Instruct-i1-Q4_K_M.gguf create mode 100644 Kai-3B-Instruct-i1-Q4_K_S.gguf create mode 100644 Kai-3B-Instruct-i1-Q5_0.gguf create mode 100644 Kai-3B-Instruct-i1-Q5_1.gguf create mode 100644 Kai-3B-Instruct-i1-Q5_K_M.gguf create mode 100644 Kai-3B-Instruct-i1-Q5_K_S.gguf create mode 100644 Kai-3B-Instruct-i1-Q6_K.gguf create mode 100644 Kai-3B-Instruct-i1-Q8_0.gguf create mode 100644 Kai-3B-Instruct.imatrix.gguf create mode 100644 README.md diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..e15261e --- /dev/null +++ b/.gitattributes @@ -0,0 +1,64 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-IQ1_M.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-IQ1_S.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-IQ2_M.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-IQ2_S.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-IQ2_XS.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-IQ2_XXS.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-IQ3_M.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-IQ3_S.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-IQ3_XS.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-IQ3_XXS.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-IQ4_NL.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-MXFP4_MOE.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-Q2_K.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-Q2_K_S.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-Q4_1.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-Q5_0.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-Q5_1.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct-i1-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text +Kai-3B-Instruct.imatrix.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/Kai-3B-Instruct-detailed-bench.csv b/Kai-3B-Instruct-detailed-bench.csv new file mode 100644 index 0000000..18eef18 --- /dev/null +++ b/Kai-3B-Instruct-detailed-bench.csv @@ -0,0 +1,49 @@ +Model,Size_GB,BPW,PPL_Q,KLD_Mean,KLD_Max,Top_P_Match +Kai-3B-Instruct-BF16.gguf,5.735,16.02,13.071019,-0.000011,0.000004,100.000% +Kai-3B-Instruct-IQ3_M.gguf,1.368,3.82,15.668992,0.229215,6.596559,79.735% +Kai-3B-Instruct-IQ3_S.gguf,1.339,3.74,22.508640,0.641822,13.271622,67.115% +Kai-3B-Instruct-IQ4_NL.gguf,1.697,4.74,13.689333,0.045861,3.379928,90.177% +Kai-3B-Instruct-IQ4_XS.gguf,1.619,4.52,13.636783,0.046371,3.109986,89.944% +Kai-3B-Instruct-MXFP4_MOE.gguf,3.051,8.52,13.071337,0.001851,0.099089,97.407% +Kai-3B-Instruct-Q2_K.gguf,1.167,3.26,19.239565,0.445062,13.633359,72.289% +Kai-3B-Instruct-Q3_K_L.gguf,1.574,4.4,14.266756,0.092459,8.322849,86.395% +Kai-3B-Instruct-Q3_K_M.gguf,1.463,4.09,14.354851,0.107656,9.363343,85.335% +Kai-3B-Instruct-Q3_K_S.gguf,1.334,3.73,15.492130,0.201516,9.796855,80.455% +Kai-3B-Instruct-Q4_0.gguf,1.682,4.7,14.086875,0.078766,7.626556,87.584% +Kai-3B-Instruct-Q4_1.gguf,1.845,5.16,13.721412,0.069878,4.738765,88.105% +Kai-3B-Instruct-Q4_K_M.gguf,1.784,4.98,13.529246,0.036909,4.488238,91.090% +Kai-3B-Instruct-Q4_K_S.gguf,1.693,4.73,13.660392,0.047615,2.440885,89.987% +Kai-3B-Instruct-Q5_0.gguf,2.009,5.61,13.430972,0.024144,1.595060,92.381% +Kai-3B-Instruct-Q5_1.gguf,2.173,6.07,13.301717,0.022064,1.491981,92.723% +Kai-3B-Instruct-Q5_K_M.gguf,2.062,5.76,13.284102,0.015320,1.171668,93.752% +Kai-3B-Instruct-Q5_K_S.gguf,2.009,5.61,13.373599,0.020303,2.434403,93.103% +Kai-3B-Instruct-Q6_K.gguf,2.357,6.58,13.139961,0.008945,1.172464,95.024% +Kai-3B-Instruct-Q8_0.gguf,3.051,8.52,13.071337,0.001851,0.099089,97.407% +Kai-3B-Instruct-i1-IQ1_M.gguf,0.776,2.17,58.209109,1.653420,16.925764,49.496% +Kai-3B-Instruct-i1-IQ1_S.gguf,0.72,2.01,128.277281,2.531440,21.927551,39.075% +Kai-3B-Instruct-i1-IQ2_M.gguf,1.048,2.93,17.573076,0.359494,12.123366,74.706% +Kai-3B-Instruct-i1-IQ2_S.gguf,0.974,2.72,19.779741,0.493906,11.262773,71.038% +Kai-3B-Instruct-i1-IQ2_XS.gguf,0.946,2.64,21.411029,0.579169,13.967813,68.776% +Kai-3B-Instruct-i1-IQ2_XXS.gguf,0.868,2.42,28.578416,0.878993,15.491375,62.348% +Kai-3B-Instruct-i1-IQ3_M.gguf,1.368,3.82,13.898071,0.105325,5.612194,85.765% +Kai-3B-Instruct-i1-IQ3_S.gguf,1.339,3.74,14.056398,0.113459,9.671529,85.283% +Kai-3B-Instruct-i1-IQ3_XS.gguf,1.277,3.57,14.349473,0.142024,7.847116,83.815% +Kai-3B-Instruct-i1-IQ3_XXS.gguf,1.181,3.3,15.252164,0.206685,13.769287,80.107% +Kai-3B-Instruct-i1-IQ4_NL.gguf,1.686,4.71,13.614878,0.040318,6.215707,90.568% +Kai-3B-Instruct-i1-IQ4_XS.gguf,1.605,4.48,13.621778,0.041313,4.147662,90.477% +Kai-3B-Instruct-i1-MXFP4_MOE.gguf,3.051,8.52,13.071337,0.001851,0.099089,97.407% +Kai-3B-Instruct-i1-Q2_K.gguf,1.167,3.26,18.283702,0.352258,16.202076,75.310% +Kai-3B-Instruct-i1-Q2_K_S.gguf,1.096,3.06,20.172636,0.460490,13.532367,72.285% +Kai-3B-Instruct-i1-Q3_K_L.gguf,1.574,4.4,14.190292,0.081455,5.245802,87.164% +Kai-3B-Instruct-i1-Q3_K_M.gguf,1.463,4.09,14.371849,0.093973,4.376335,86.425% +Kai-3B-Instruct-i1-Q3_K_S.gguf,1.334,3.73,15.463698,0.188454,8.970517,81.016% +Kai-3B-Instruct-i1-Q4_0.gguf,1.687,4.71,13.691418,0.053883,4.177481,89.222% +Kai-3B-Instruct-i1-Q4_1.gguf,1.845,5.16,13.586296,0.038176,2.971467,90.941% +Kai-3B-Instruct-i1-Q4_K_M.gguf,1.784,4.98,13.544169,0.033345,4.224728,91.440% +Kai-3B-Instruct-i1-Q4_K_S.gguf,1.693,4.73,13.613850,0.038684,2.172965,90.973% +Kai-3B-Instruct-i1-Q5_0.gguf,2.014,5.63,13.340498,0.017448,1.049807,93.467% +Kai-3B-Instruct-i1-Q5_1.gguf,2.173,6.07,13.262362,0.014588,1.789870,93.939% +Kai-3B-Instruct-i1-Q5_K_M.gguf,2.062,5.76,13.278514,0.013944,2.965405,94.039% +Kai-3B-Instruct-i1-Q5_K_S.gguf,2.009,5.61,13.253110,0.015325,3.441511,93.683% +Kai-3B-Instruct-i1-Q6_K.gguf,2.357,6.58,13.171252,0.008568,1.565256,95.021% +Kai-3B-Instruct-i1-Q8_0.gguf,3.051,8.52,13.071337,0.001851,0.099089,97.407% diff --git a/Kai-3B-Instruct-i1-IQ1_M.gguf b/Kai-3B-Instruct-i1-IQ1_M.gguf new file mode 100644 index 0000000..17b1e79 --- /dev/null +++ b/Kai-3B-Instruct-i1-IQ1_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81a697d2e2be0535278d2a62d9071e3ff9b567a2402ce6fdefe178529bb8e119 +size 832950208 diff --git a/Kai-3B-Instruct-i1-IQ1_S.gguf b/Kai-3B-Instruct-i1-IQ1_S.gguf new file mode 100644 index 0000000..3b75373 --- /dev/null +++ b/Kai-3B-Instruct-i1-IQ1_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1404ddd459037511ed2d56c9f0f56c7b2e4c4245882d57063638f8e693825d3e +size 773574592 diff --git a/Kai-3B-Instruct-i1-IQ2_M.gguf b/Kai-3B-Instruct-i1-IQ2_M.gguf new file mode 100644 index 0000000..b5301a4 --- /dev/null +++ b/Kai-3B-Instruct-i1-IQ2_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da812e6f3115c5c523242aa0ebe2907d7f23dbd3452e84c4c7a5719168e79143 +size 1125355456 diff --git a/Kai-3B-Instruct-i1-IQ2_S.gguf b/Kai-3B-Instruct-i1-IQ2_S.gguf new file mode 100644 index 0000000..ce6861b --- /dev/null +++ b/Kai-3B-Instruct-i1-IQ2_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ab5bc37f6c01ebfb6cb9cad0d4744da07ec6c5f26216a2ff89a4b07f76bd2a8 +size 1046187968 diff --git a/Kai-3B-Instruct-i1-IQ2_XS.gguf b/Kai-3B-Instruct-i1-IQ2_XS.gguf new file mode 100644 index 0000000..ceb1d07 --- /dev/null +++ b/Kai-3B-Instruct-i1-IQ2_XS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4120d7343961be10ef41c51dd37497ad1f99b421c27de0e08e4f275ec18aa42 +size 1015795648 diff --git a/Kai-3B-Instruct-i1-IQ2_XXS.gguf b/Kai-3B-Instruct-i1-IQ2_XXS.gguf new file mode 100644 index 0000000..0000477 --- /dev/null +++ b/Kai-3B-Instruct-i1-IQ2_XXS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49370a74752c2a6a4af650629d09bfbf0362f71f70dbd4c034599d09f6acfa6c +size 931909568 diff --git a/Kai-3B-Instruct-i1-IQ3_M.gguf b/Kai-3B-Instruct-i1-IQ3_M.gguf new file mode 100644 index 0000000..913de68 --- /dev/null +++ b/Kai-3B-Instruct-i1-IQ3_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8991434db66fdec6c9a7b679ca0247f7d81d3efa2983939b9d978d039272490f +size 1469358016 diff --git a/Kai-3B-Instruct-i1-IQ3_S.gguf b/Kai-3B-Instruct-i1-IQ3_S.gguf new file mode 100644 index 0000000..29c8573 --- /dev/null +++ b/Kai-3B-Instruct-i1-IQ3_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c0ba36baee9a6fa8bd6ff5d9f02447ee66e268bdc9ba0e962d11a3cc02d0273 +size 1437327296 diff --git a/Kai-3B-Instruct-i1-IQ3_XS.gguf b/Kai-3B-Instruct-i1-IQ3_XS.gguf new file mode 100644 index 0000000..b38dde4 --- /dev/null +++ b/Kai-3B-Instruct-i1-IQ3_XS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0541bbd4f468b554b21df00bc2783d589f8520d043c13d52423f3bdd39bad384 +size 1371414464 diff --git a/Kai-3B-Instruct-i1-IQ3_XXS.gguf b/Kai-3B-Instruct-i1-IQ3_XXS.gguf new file mode 100644 index 0000000..52c63e6 --- /dev/null +++ b/Kai-3B-Instruct-i1-IQ3_XXS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6de8c880307bce932162bf7439ce1817494bc7f5a7144d83259d68242a5a8f88 +size 1267666880 diff --git a/Kai-3B-Instruct-i1-IQ4_NL.gguf b/Kai-3B-Instruct-i1-IQ4_NL.gguf new file mode 100644 index 0000000..35907a8 --- /dev/null +++ b/Kai-3B-Instruct-i1-IQ4_NL.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93f9c2fe1a5d2d075abb67575234920d378c09cd5003c374c244a0d694bc4138 +size 1810538432 diff --git a/Kai-3B-Instruct-i1-IQ4_XS.gguf b/Kai-3B-Instruct-i1-IQ4_XS.gguf new file mode 100644 index 0000000..280b49c --- /dev/null +++ b/Kai-3B-Instruct-i1-IQ4_XS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14605ae2bb42667e7ed513d2c2e569ce9a5511789a297e8f4f1dd541eb92c387 +size 1723834304 diff --git a/Kai-3B-Instruct-i1-MXFP4_MOE.gguf b/Kai-3B-Instruct-i1-MXFP4_MOE.gguf new file mode 100644 index 0000000..e0591d5 --- /dev/null +++ b/Kai-3B-Instruct-i1-MXFP4_MOE.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea645bb457fdfe8986497638259b08f4a6845744f5f67cf6cfcebe29cd688007 +size 3275575232 diff --git a/Kai-3B-Instruct-i1-Q2_K.gguf b/Kai-3B-Instruct-i1-Q2_K.gguf new file mode 100644 index 0000000..71f2fe1 --- /dev/null +++ b/Kai-3B-Instruct-i1-Q2_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c222eab126b56da97f1086db67d9111c584633c8bce2f4c46dc91fa7f44f9ebd +size 1253302208 diff --git a/Kai-3B-Instruct-i1-Q2_K_S.gguf b/Kai-3B-Instruct-i1-Q2_K_S.gguf new file mode 100644 index 0000000..79ce42a --- /dev/null +++ b/Kai-3B-Instruct-i1-Q2_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:953d5e551f86e32dc811e574970c18b4a2c625f52f9a586293a169b9b043d6df +size 1176674240 diff --git a/Kai-3B-Instruct-i1-Q3_K_L.gguf b/Kai-3B-Instruct-i1-Q3_K_L.gguf new file mode 100644 index 0000000..b9edc1f --- /dev/null +++ b/Kai-3B-Instruct-i1-Q3_K_L.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1b67b57d42ada530a8d30b1d0383ca516b8f7250eceadd4fc63b920550fe510 +size 1690214336 diff --git a/Kai-3B-Instruct-i1-Q3_K_M.gguf b/Kai-3B-Instruct-i1-Q3_K_M.gguf new file mode 100644 index 0000000..da540a7 --- /dev/null +++ b/Kai-3B-Instruct-i1-Q3_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:462862d69d0973022ec9f1e1c0114e7d927d1ff0ae04869e85d6d02aafd14c80 +size 1571069888 diff --git a/Kai-3B-Instruct-i1-Q3_K_S.gguf b/Kai-3B-Instruct-i1-Q3_K_S.gguf new file mode 100644 index 0000000..39040a3 --- /dev/null +++ b/Kai-3B-Instruct-i1-Q3_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f5491024d8175cd8b93e3349337b90b1a6b0e76e1fa400d36fe3d8c7816009a +size 1432313792 diff --git a/Kai-3B-Instruct-i1-Q4_0.gguf b/Kai-3B-Instruct-i1-Q4_0.gguf new file mode 100644 index 0000000..01fdf89 --- /dev/null +++ b/Kai-3B-Instruct-i1-Q4_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc79ad46bb7c0ce05513e8e73eebe4af6b45270add29abdfc790e680d0ba221 +size 1811455936 diff --git a/Kai-3B-Instruct-i1-Q4_1.gguf b/Kai-3B-Instruct-i1-Q4_1.gguf new file mode 100644 index 0000000..ce24f26 --- /dev/null +++ b/Kai-3B-Instruct-i1-Q4_1.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:581ef2b01898aaad9a3397b98cf096703f72e599367bd7433dc422eb3a0584f2 +size 1981587392 diff --git a/Kai-3B-Instruct-i1-Q4_K_M.gguf b/Kai-3B-Instruct-i1-Q4_K_M.gguf new file mode 100644 index 0000000..96362c4 --- /dev/null +++ b/Kai-3B-Instruct-i1-Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d322420080f1486e9322540562ed6aa30df855a8b9a22b462bc529ef6e7d77a0 +size 1915305920 diff --git a/Kai-3B-Instruct-i1-Q4_K_S.gguf b/Kai-3B-Instruct-i1-Q4_K_S.gguf new file mode 100644 index 0000000..c305d9a --- /dev/null +++ b/Kai-3B-Instruct-i1-Q4_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1260a938e06949abc9dddc9964d7226f1e2be291e49afaa6305d8ef15c89a7ba +size 1817616320 diff --git a/Kai-3B-Instruct-i1-Q5_0.gguf b/Kai-3B-Instruct-i1-Q5_0.gguf new file mode 100644 index 0000000..3a5737a --- /dev/null +++ b/Kai-3B-Instruct-i1-Q5_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeffed253a6e9bcf53e88c697a2129b9f3f9cf99234247adc138798b5c56b5a0 +size 2162991040 diff --git a/Kai-3B-Instruct-i1-Q5_1.gguf b/Kai-3B-Instruct-i1-Q5_1.gguf new file mode 100644 index 0000000..13a5ab1 --- /dev/null +++ b/Kai-3B-Instruct-i1-Q5_1.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b8ac34242e0373f5a472cf782217e31cd8e25109e4326ebe8acdec7e2af0176 +size 2333122496 diff --git a/Kai-3B-Instruct-i1-Q5_K_M.gguf b/Kai-3B-Instruct-i1-Q5_K_M.gguf new file mode 100644 index 0000000..3a782f7 --- /dev/null +++ b/Kai-3B-Instruct-i1-Q5_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:636fa6f8a6fa8bfcd18d4e7ea1e12ff73f9fb36c3c6fe2cd90f203d6b79ea677 +size 2213756864 diff --git a/Kai-3B-Instruct-i1-Q5_K_S.gguf b/Kai-3B-Instruct-i1-Q5_K_S.gguf new file mode 100644 index 0000000..0bb491b --- /dev/null +++ b/Kai-3B-Instruct-i1-Q5_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27768d23d642ffd4fe9474d8caf3a2fcb0c1c81f492b9e6862bfca596be510dd +size 2157354944 diff --git a/Kai-3B-Instruct-i1-Q6_K.gguf b/Kai-3B-Instruct-i1-Q6_K.gguf new file mode 100644 index 0000000..33173fe --- /dev/null +++ b/Kai-3B-Instruct-i1-Q6_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce03becf0cf4d340bbc0b1cb64262d81d209b4aaccd04c85bc1ab389401ef471 +size 2530860992 diff --git a/Kai-3B-Instruct-i1-Q8_0.gguf b/Kai-3B-Instruct-i1-Q8_0.gguf new file mode 100644 index 0000000..371701d --- /dev/null +++ b/Kai-3B-Instruct-i1-Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e83f5ec43376455ace239211f03c889dfc2b91edade35e05d44b5f9a9f39f11 +size 3275575232 diff --git a/Kai-3B-Instruct.imatrix.gguf b/Kai-3B-Instruct.imatrix.gguf new file mode 100644 index 0000000..21f6a29 --- /dev/null +++ b/Kai-3B-Instruct.imatrix.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5abb3ac8c71920905363529494f3b17942fc7816b86bfc61ac98b6a18afad2d5 +size 3393408 diff --git a/README.md b/README.md new file mode 100644 index 0000000..5055427 --- /dev/null +++ b/README.md @@ -0,0 +1,168 @@ +--- +library_name: transformers +license: apache-2.0 +tags: +- math +- reasoning +- text-generation +- ads +- distillation +language: +- en +pipeline_tag: text-generation +base_model: +- NoesisLab/Kai-3B-Instruct +--- +This is a Imatrix quantization of [NoesisLab/Kai-3B-Instruct](https://huggingface.co/NoesisLab/Kai-3B-Instruct), made by [SimplySara](https://huggingface.co/SimplySara) + +Note from NoesisLab "Due to the ADS distillation method, this model is highly sensitive to quantization noise. Q8_0 or Q6_K are strongly recommended for preserving both logical integrity and conversational alignment. Q4 variants may exhibit template collapse." + +| Model | Size_GB | BPW | PPL_Q | KLD_Mean | KLD_Max | Top_P_Match | +|:----------------------------------|----------:|------:|---------:|-----------:|----------:|:--------------| +| Kai-3B-Instruct-BF16.gguf | 5.735 | 16.02 | 12.2614 | -1.2e-05 | 4e-06 | 100.000% | +| Kai-3B-Instruct-MXFP4_MOE.gguf | 3.051 | 8.52 | 12.268 | 0.001919 | 0.161748 | 97.288% | +| Kai-3B-Instruct-i1-MXFP4_MOE.gguf | 3.051 | 8.52 | 12.268 | 0.001919 | 0.161748 | 97.288% | +| Kai-3B-Instruct-Q8_0.gguf | 3.051 | 8.52 | 12.268 | 0.001919 | 0.161748 | 97.288% | +| Kai-3B-Instruct-i1-Q8_0.gguf | 3.051 | 8.52 | 12.268 | 0.001919 | 0.161748 | 97.288% | +| Kai-3B-Instruct-Q6_K.gguf | 2.357 | 6.58 | 12.3055 | 0.009404 | 0.366649 | 94.435% | +| Kai-3B-Instruct-i1-Q6_K.gguf | 2.357 | 6.58 | 12.3486 | 0.008842 | 0.528699 | 94.605% | +| Kai-3B-Instruct-Q5_1.gguf | 2.173 | 6.07 | 12.4607 | 0.022546 | 1.62058 | 92.336% | +| Kai-3B-Instruct-i1-Q5_1.gguf | 2.173 | 6.07 | 12.3913 | 0.015555 | 0.887861 | 93.164% | +| Kai-3B-Instruct-Q5_K_M.gguf | 2.062 | 5.76 | 12.3932 | 0.015953 | 2.06684 | 93.315% | +| Kai-3B-Instruct-i1-Q5_K_M.gguf | 2.062 | 5.76 | 12.3974 | 0.014712 | 1.21054 | 93.344% | +| Kai-3B-Instruct-i1-Q5_0.gguf | 2.014 | 5.63 | 12.3845 | 0.018582 | 1.7811 | 92.676% | +| Kai-3B-Instruct-Q5_K_S.gguf | 2.009 | 5.61 | 12.4705 | 0.021112 | 2.25188 | 92.477% | +| Kai-3B-Instruct-i1-Q5_K_S.gguf | 2.009 | 5.61 | 12.422 | 0.016098 | 1.02742 | 93.198% | +| Kai-3B-Instruct-Q5_0.gguf | 2.009 | 5.61 | 12.5354 | 0.024549 | 2.64757 | 91.846% | +| Kai-3B-Instruct-i1-Q4_1.gguf | 1.845 | 5.16 | 12.6693 | 0.039282 | 2.17269 | 90.104% | +| Kai-3B-Instruct-Q4_1.gguf | 1.845 | 5.16 | 12.8411 | 0.070893 | 9.75963 | 87.274% | +| Kai-3B-Instruct-i1-Q4_K_M.gguf | 1.784 | 4.98 | 12.562 | 0.033791 | 2.37929 | 90.693% | +| Kai-3B-Instruct-Q4_K_M.gguf | 1.784 | 4.98 | 12.5551 | 0.039329 | 8.08951 | 90.011% | +| Kai-3B-Instruct-IQ4_NL.gguf | 1.697 | 4.74 | 12.6349 | 0.04746 | 3.75837 | 89.164% | +| Kai-3B-Instruct-Q4_K_S.gguf | 1.693 | 4.73 | 12.6881 | 0.050317 | 7.15421 | 88.889% | +| Kai-3B-Instruct-i1-Q4_K_S.gguf | 1.693 | 4.73 | 12.672 | 0.038976 | 2.35062 | 90.141% | +| Kai-3B-Instruct-i1-Q4_0.gguf | 1.687 | 4.71 | 12.9318 | 0.056914 | 4.90942 | 88.242% | +| Kai-3B-Instruct-i1-IQ4_NL.gguf | 1.686 | 4.71 | 12.7029 | 0.041041 | 2.82814 | 89.995% | +| Kai-3B-Instruct-Q4_0.gguf | 1.682 | 4.7 | 13.1831 | 0.079359 | 5.30813 | 86.546% | +| Kai-3B-Instruct-IQ4_XS.gguf | 1.619 | 4.52 | 12.6642 | 0.048527 | 3.11693 | 89.010% | +| Kai-3B-Instruct-i1-IQ4_XS.gguf | 1.605 | 4.48 | 12.7351 | 0.042119 | 2.81661 | 89.976% | +| Kai-3B-Instruct-Q3_K_L.gguf | 1.574 | 4.4 | 13.2229 | 0.095355 | 8.63835 | 85.518% | +| Kai-3B-Instruct-i1-Q3_K_L.gguf | 1.574 | 4.4 | 13.2477 | 0.084668 | 5.71143 | 86.163% | +| Kai-3B-Instruct-Q3_K_M.gguf | 1.463 | 4.09 | 13.3455 | 0.112669 | 9.19842 | 84.135% | +| Kai-3B-Instruct-i1-Q3_K_M.gguf | 1.463 | 4.09 | 13.4095 | 0.095939 | 7.93677 | 85.368% | +| Kai-3B-Instruct-i1-IQ3_M.gguf | 1.368 | 3.82 | 13.1481 | 0.112437 | 6.45799 | 84.307% |"Note: Due to the ADS distillation method, this model is highly sensitive to quantization noise. Q8_0 or Q6_K are strongly recommended for preserving both logical integrity and conversational alignment. Q4 variants may exhibit template collapse." +| Kai-3B-Instruct-IQ3_M.gguf | 1.368 | 3.82 | 14.5693 | 0.246713 | 7.29781 | 77.711% | +| Kai-3B-Instruct-IQ3_S.gguf | 1.339 | 3.74 | 20.2851 | 0.623557 | 14.9444 | 66.169% | +| Kai-3B-Instruct-i1-IQ3_S.gguf | 1.339 | 3.74 | 13.2823 | 0.120975 | 6.12451 | 83.724% | +| Kai-3B-Instruct-i1-Q3_K_S.gguf | 1.334 | 3.73 | 14.4279 | 0.196396 | 11.9249 | 79.536% | +| Kai-3B-Instruct-Q3_K_S.gguf | 1.334 | 3.73 | 14.5753 | 0.20947 | 10.2762 | 79.235% | +| Kai-3B-Instruct-i1-IQ3_XS.gguf | 1.277 | 3.57 | 13.5713 | 0.149838 | 5.19091 | 81.978% | +| Kai-3B-Instruct-i1-IQ3_XXS.gguf | 1.181 | 3.3 | 14.4968 | 0.218333 | 7.41132 | 78.317% | +| Kai-3B-Instruct-i1-Q2_K.gguf | 1.167 | 3.26 | 17.0515 | 0.362859 | 13.7054 | 73.511% | +| Kai-3B-Instruct-Q2_K.gguf | 1.167 | 3.26 | 18.421 | 0.471699 | 10.9955 | 70.276% | +| Kai-3B-Instruct-i1-Q2_K_S.gguf | 1.096 | 3.06 | 19.0203 | 0.47105 | 9.39981 | 70.322% | +| Kai-3B-Instruct-i1-IQ2_M.gguf | 1.048 | 2.93 | 16.8179 | 0.377914 | 8.06048 | 72.505% | +| Kai-3B-Instruct-i1-IQ2_S.gguf | 0.974 | 2.72 | 18.9657 | 0.507571 | 10.146 | 68.855% | +| Kai-3B-Instruct-i1-IQ2_XS.gguf | 0.946 | 2.64 | 20.7434 | 0.60263 | 12.2848 | 66.248% | +| Kai-3B-Instruct-i1-IQ2_XXS.gguf | 0.868 | 2.42 | 28.0716 | 0.912772 | 20.8551 | 59.005% | +| Kai-3B-Instruct-i1-IQ1_M.gguf | 0.776 | 2.17 | 56.0938 | 1.71797 | 16.7686 | 46.262% | +| Kai-3B-Instruct-i1-IQ1_S.gguf | 0.72 | 2.01 | 142.119 | 2.71244 | 23.1949 | 35.970% | + + +--- +# Kai-3B-Instruct + +A 3B-parameter instruction-tuned language model optimized for reasoning, math, and code generation tasks, powered by our new **ADS (Adaptive Dual-Search Distillation)** technique. + +## Model Details + +| | | +|---|---| +| **Model** | Kai-3B-Instruct | +| **Architecture** | SmolLM3ForCausalLM | +| **Parameters** | 3B | +| **Hidden size** | 2048 | +| **Intermediate size** | 11008 | +| **Layers** | 36 | +| **Attention heads** | 16 (4 KV heads, GQA) | +| **Context length** | 65536 | +| **Precision** | bfloat16 | +| **Vocab size** | 128,256 | + +## What is ADS? + +**Adaptive Dual-Search Distillation** treats model fine-tuning as a constrained optimization problem inspired by Operations Research. The core mechanism is a dynamic loss function with a stateful dual penalty factor that adapts based on embedding space entropy — forcing the model to converge to high-confidence predictions at difficult reasoning points, without modifying the model architecture. + +## Benchmark Results + +![Performance Comparison Across General, Code, and Math Benchmarks](model_comparison.png) + +### General (5-shot, log-likelihood) + +| Model | Params | MMLU | ARC-c (acc_norm) | HellaSwag (acc_norm) | PIQA (acc_norm) | +|---|:---:|:---:|:---:|:---:|:---:| +| TinyLlama | 1.1B | ~26.0% | ~33.0% | ~60.0% | ~71.0% | +| SmolLM2 | 1.7B | ~35.0% | ~38.0% | ~65.0% | ~74.0% | +| Llama-2-7B | 7B | 45.3% | 46.2% | 77.2% | 79.8% | +| Gemma-2-2B | 2.6B | ~52.0% | ~53.0% | 75.0% | ~78.0% | +| **Kai-3B-Instruct** | **3B** | **53.62%** | **51.88%** | **69.53%** | **77.53%** | +| Qwen2.5-3B | 3B | ~63.0% | ~55.0% | ~73.0% | ~80.0% | + +## Code Generation — HumanEval (Pass@1, 0-shot) + +| Model | Params | HumanEval (Pass@1) | Notes | +|---|:---:|:---:|---| +| Llama-2-7B | 7B | ~12.8% | 3x overtake — smaller model, far better code | +| SmolLM2-1.7B | 1.7B | ~25.0% | ADS delivers +14pp pure gain | +| Gemma-2-2B | 2B | ~30.0% | Surpasses Google's heavily distilled 2B flagship | +| **Kai-3B-Instruct** | **3B** | **39.02%** | **ADS topological pruning, full pipeline** | +| GPT-3.5 (Legacy) | 175B | ~48.0% | Kai-3B trails the original GPT-3.5 by only ~9pp | + +## Math — GSM8K (0-shot) + +| Model | Params | GSM8K (exact_match) | +|---|:---:|:---:| +| **Kai-3B-Instruct** | **3B** | **39.27%** | + +### Key Observations + +1. **Surpasses Llama-2-7B**: Kai-3B outperforms Llama-2-7B on MMLU (+8.3pp) and ARC-Challenge (+5.7pp) with less than half the parameters — a 7B model decisively beaten by a 3B distilled model. + +2. **Competitive with Gemma-2-2B**: Matches or exceeds Google's Gemma-2-2B on MMLU (+1.6pp) and PIQA, despite Gemma being trained with significantly more compute. + +3. **HellaSwag**: At **69.53%**, Kai-3B surpasses all sub-2B models by a wide margin and trails the compute-heavy Qwen2.5-3B by only ~3.5pp. + +4. **PIQA**: At **77.53%**, Kai-3B nearly matches Gemma-2-2B (~78.0%) and approaches the 3B-class ceiling set by Qwen2.5-3B (~80.0%). + +## Usage + +```python +from transformers import AutoModelForCausalLM, AutoTokenizer +import torch + +model = AutoModelForCausalLM.from_pretrained( + "NoesisLab/Kai-3B-Instruct", + torch_dtype=torch.bfloat16, +) +tokenizer = AutoTokenizer.from_pretrained("NoesisLab/Kai-3B-Instruct") + +messages = [{"role": "user", "content": "What is 25 * 4?"}] +input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt") +output = model.generate(input_ids, max_new_tokens=256) +print(tokenizer.decode(output[0], skip_special_tokens=True)) +``` + +## Citation + +```bibtex +@misc{noesislab2026kai3b, + title={Kai-3B-Instruct}, + author={NoesisLab}, + year={2026}, + url={https://huggingface.co/NoesisLab/Kai-3B-Instruct} +} +``` + +## License + +Apache 2.0 \ No newline at end of file