From 3a5ea7b4ef91239da2397f871488c0de7e43c5a4 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Tue, 12 May 2026 05:36:38 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF Source: Original Platform --- .gitattributes | 59 +++++++++++++ GigaChat-20B-A3B-instruct-bf16.i1-IQ1_M.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-IQ1_S.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-IQ2_M.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-IQ2_S.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-IQ2_XS.gguf | 3 + ...Chat-20B-A3B-instruct-bf16.i1-IQ2_XXS.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-IQ3_M.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-IQ3_S.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-IQ3_XS.gguf | 3 + ...Chat-20B-A3B-instruct-bf16.i1-IQ3_XXS.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-IQ4_XS.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-Q2_K.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-Q2_K_S.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_L.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_M.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_S.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-Q4_0.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-Q4_1.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-Q4_K_M.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-Q4_K_S.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-Q5_K_M.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-Q5_K_S.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.i1-Q6_K.gguf | 3 + GigaChat-20B-A3B-instruct-bf16.imatrix.gguf | 3 + README.md | 85 +++++++++++++++++++ 26 files changed, 216 insertions(+) create mode 100644 .gitattributes create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-IQ1_M.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-IQ1_S.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-IQ2_M.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-IQ2_S.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-IQ2_XS.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-IQ2_XXS.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-IQ3_M.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-IQ3_S.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-IQ3_XS.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-IQ3_XXS.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-IQ4_XS.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-Q2_K.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-Q2_K_S.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_L.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_M.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_S.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-Q4_0.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-Q4_1.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-Q4_K_M.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-Q4_K_S.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-Q5_K_M.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-Q5_K_S.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.i1-Q6_K.gguf create mode 100644 GigaChat-20B-A3B-instruct-bf16.imatrix.gguf create mode 100644 README.md diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..5d16d34 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,59 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.imatrix.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-Q2_K.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-IQ3_M.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-IQ3_XXS.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-IQ2_M.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-Q2_K_S.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-IQ1_M.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-IQ2_XXS.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-IQ2_XS.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-IQ2_S.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-IQ1_S.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-IQ3_XS.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-Q4_1.gguf filter=lfs diff=lfs merge=lfs -text +GigaChat-20B-A3B-instruct-bf16.i1-IQ3_S.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-IQ1_M.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-IQ1_M.gguf new file mode 100644 index 0000000..a2e2ae5 --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-IQ1_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b47d4ae3f4c31f99947f33b6f23878afacb287a9b56628a0dbdc86321c473a03 +size 4730148160 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-IQ1_S.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-IQ1_S.gguf new file mode 100644 index 0000000..3c97b1b --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-IQ1_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22f2b5f9b9b1b4ebf4ac963bc8aa316131e108614d8dd78f7d462b7b1872fb51 +size 4270478656 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-IQ2_M.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-IQ2_M.gguf new file mode 100644 index 0000000..0b96544 --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-IQ2_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:241aa2f1bb68b6d2ff89b2c5afc4770b415d051591059677ed2b21f69f41ac0f +size 6796494144 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-IQ2_S.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-IQ2_S.gguf new file mode 100644 index 0000000..7f536c1 --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-IQ2_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:095a373c71015620fa05d8e37019b96d2504f0158b6559e85ab7953df48ccb6f +size 6183601472 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-IQ2_XS.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-IQ2_XS.gguf new file mode 100644 index 0000000..f1e614b --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-IQ2_XS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86b72084c623bf0e9740a2ab8e6cb9b1a877428d8955ddbb33f7e55947a5e3a4 +size 6112826688 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-IQ2_XXS.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-IQ2_XXS.gguf new file mode 100644 index 0000000..2268ab5 --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-IQ2_XXS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69b58d284903b002c3ff1f300772ebf31159e6d582e831d3874f2cabee6c5066 +size 5496264000 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-IQ3_M.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-IQ3_M.gguf new file mode 100644 index 0000000..102ecca --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-IQ3_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de686e36a2b5dd201386bdc58805686dcaa33ccc698d147751106afc4ec08699 +size 9060341056 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-IQ3_S.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-IQ3_S.gguf new file mode 100644 index 0000000..4aa539d --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-IQ3_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e71ac445e6002c15fa1ac6fc920d1ba0e654c7aaa434d1989314da33fe325cd7 +size 8968705344 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-IQ3_XS.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-IQ3_XS.gguf new file mode 100644 index 0000000..337a825 --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-IQ3_XS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbdb0b2e01eaa19cb93445a9c15066ab8c60556e777118bc39ad47f5aa91f8b8 +size 8483575104 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-IQ3_XXS.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-IQ3_XXS.gguf new file mode 100644 index 0000000..303517a --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-IQ3_XXS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02c0f4332839b39ade5225b5475b746b5d757209fa5888b738f8f79d058b69ae +size 7987987776 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-IQ4_XS.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-IQ4_XS.gguf new file mode 100644 index 0000000..85b463c --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-IQ4_XS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c776fb7b1f1d5fe8de4d090f7cf1e8890998d412262ca88d13dfd329961b271c +size 11032757568 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-Q2_K.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-Q2_K.gguf new file mode 100644 index 0000000..4f40484 --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-Q2_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ff4113f0a01a62d1b65e19c501dbf8665060d4d329076e5e8be347e043f051b +size 7589741888 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-Q2_K_S.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-Q2_K_S.gguf new file mode 100644 index 0000000..2358582 --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-Q2_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d776d75039b42cd72a4c60dd491fb73bcafae9f2e082e2fec1b3e391f8c31094 +size 7025075520 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_L.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_L.gguf new file mode 100644 index 0000000..d1827d0 --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_L.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d520797ee39080f196c8ef3f86320227e4330cfba0301a4cdb017bc2deefaad +size 10707776832 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_M.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_M.gguf new file mode 100644 index 0000000..a40f21b --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fa585c7b36219d37606db28cbe18dc85fc249d1a3deeb7ac8a45238bed66ed2 +size 9868784960 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_S.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_S.gguf new file mode 100644 index 0000000..42f5a04 --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20a0ca8ee8f252789432482da3a778886ce44644ab6e54d0e7da6f96d21e0aea +size 8968705344 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-Q4_0.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-Q4_0.gguf new file mode 100644 index 0000000..b49454e --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-Q4_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a89c986ef9267bcc587be268630baa07e60490b6c9ca9f45db294cca3dabd842 +size 11699963200 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-Q4_1.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-Q4_1.gguf new file mode 100644 index 0000000..6ad3cc7 --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-Q4_1.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4f153238d52727073af2516177994081c74aac3be6948429c6541848e21ffb0 +size 12938036544 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-Q4_K_M.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-Q4_K_M.gguf new file mode 100644 index 0000000..2975aad --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:210b64521f1205238d9d32942cbcaabc3214178e9cd0980aec8d8179050119f8 +size 12494808384 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-Q4_K_S.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-Q4_K_S.gguf new file mode 100644 index 0000000..d182d12 --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-Q4_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c705b8a0e7fee35a50e7bdbfd3160f7f95c8f03e1453fda11967aa6917e4627a +size 11733124416 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-Q5_K_M.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-Q5_K_M.gguf new file mode 100644 index 0000000..d397e6a --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-Q5_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:871d8b6ed553955db194a4b30c9d5b16b2d839156cbd644dd7307408bcb46681 +size 14634231104 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-Q5_K_S.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-Q5_K_S.gguf new file mode 100644 index 0000000..1b85760 --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-Q5_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:095985c6d7cae2483492b866c331ada3b6c7bbb3aa889f70ee64e26897fad5e7 +size 14208222528 diff --git a/GigaChat-20B-A3B-instruct-bf16.i1-Q6_K.gguf b/GigaChat-20B-A3B-instruct-bf16.i1-Q6_K.gguf new file mode 100644 index 0000000..be78453 --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.i1-Q6_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd95c4e5f574c1b1fa7082d349b826497725a20dfed8e781b706b913597ccd6b +size 16907367744 diff --git a/GigaChat-20B-A3B-instruct-bf16.imatrix.gguf b/GigaChat-20B-A3B-instruct-bf16.imatrix.gguf new file mode 100644 index 0000000..87f3730 --- /dev/null +++ b/GigaChat-20B-A3B-instruct-bf16.imatrix.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03e7d80b6eda29ae7a8a079598b166d880b72184eaae6e04c1c379166de68f56 +size 42807904 diff --git a/README.md b/README.md new file mode 100644 index 0000000..f69741f --- /dev/null +++ b/README.md @@ -0,0 +1,85 @@ +--- +base_model: ai-sage/GigaChat-20B-A3B-instruct-bf16 +language: +- ru +- en +library_name: transformers +license: mit +mradermacher: + readme_rev: 1 +quantized_by: mradermacher +--- +## About + + + + + + + + + +weighted/imatrix quants of https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct-bf16 + + + +***For a convenient overview and download list, visit our [model page for this model](https://hf.tst.eu/model#GigaChat-20B-A3B-instruct-bf16-i1-GGUF).*** + +static quants are available at https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-GGUF +## Usage + +If you are unsure how to use GGUF files, refer to one of [TheBloke's +READMEs](https://huggingface.co/TheBloke/KafkaLM-70B-German-V0.1-GGUF) for +more details, including on how to concatenate multi-part files. + +## Provided Quants + +(sorted by size, not necessarily quality. IQ-quants are often preferable over similar sized non-IQ quants) + +| Link | Type | Size/GB | Notes | +|:-----|:-----|--------:|:------| +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.imatrix.gguf) | imatrix | 0.1 | imatrix file (for creating your own quants) | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-IQ1_S.gguf) | i1-IQ1_S | 4.4 | for the desperate | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-IQ1_M.gguf) | i1-IQ1_M | 4.8 | mostly desperate | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-IQ2_XXS.gguf) | i1-IQ2_XXS | 5.6 | | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-IQ2_XS.gguf) | i1-IQ2_XS | 6.2 | | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-IQ2_S.gguf) | i1-IQ2_S | 6.3 | | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-IQ2_M.gguf) | i1-IQ2_M | 6.9 | | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-Q2_K_S.gguf) | i1-Q2_K_S | 7.1 | very low quality | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-Q2_K.gguf) | i1-Q2_K | 7.7 | IQ3_XXS probably better | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-IQ3_XXS.gguf) | i1-IQ3_XXS | 8.1 | lower quality | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-IQ3_XS.gguf) | i1-IQ3_XS | 8.6 | | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-IQ3_S.gguf) | i1-IQ3_S | 9.1 | beats Q3_K* | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_S.gguf) | i1-Q3_K_S | 9.1 | IQ3_XS probably better | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-IQ3_M.gguf) | i1-IQ3_M | 9.2 | | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_M.gguf) | i1-Q3_K_M | 10.0 | IQ3_S probably better | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-Q3_K_L.gguf) | i1-Q3_K_L | 10.8 | IQ3_M probably better | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-IQ4_XS.gguf) | i1-IQ4_XS | 11.1 | | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-Q4_0.gguf) | i1-Q4_0 | 11.8 | fast, low quality | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-Q4_K_S.gguf) | i1-Q4_K_S | 11.8 | optimal size/speed/quality | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-Q4_K_M.gguf) | i1-Q4_K_M | 12.6 | fast, recommended | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-Q4_1.gguf) | i1-Q4_1 | 13.0 | | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-Q5_K_S.gguf) | i1-Q5_K_S | 14.3 | | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-Q5_K_M.gguf) | i1-Q5_K_M | 14.7 | | +| [GGUF](https://huggingface.co/mradermacher/GigaChat-20B-A3B-instruct-bf16-i1-GGUF/resolve/main/GigaChat-20B-A3B-instruct-bf16.i1-Q6_K.gguf) | i1-Q6_K | 17.0 | practically like static Q6_K | + +Here is a handy graph by ikawrakow comparing some lower-quality quant +types (lower is better): + +![image.png](https://www.nethype.de/huggingface_embed/quantpplgraph.png) + +And here are Artefact2's thoughts on the matter: +https://gist.github.com/Artefact2/b5f810600771265fc1e39442288e8ec9 + +## FAQ / Model Request + +See https://huggingface.co/mradermacher/model_requests for some answers to +questions you might have and/or if you want some other model quantized. + +## Thanks + +I thank my company, [nethype GmbH](https://www.nethype.de/), for letting +me use its servers and providing upgrades to my workstation to enable +this work in my free time. Additional thanks to [@nicoboss](https://huggingface.co/nicoboss) for giving me access to his private supercomputer, enabling me to provide many more imatrix quants, at much higher quality, than I would otherwise be able to. + +