commit 54169f2c87085fdeaef105800b9ab230f3092012 Author: ModelHub XC Date: Sat Jun 6 22:55:21 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: mradermacher/LLama-Rhino-8B-RAG-i1-GGUF Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..4cb7f74 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,60 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +imatrix.dat filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-Q2_K.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-IQ3_M.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-IQ3_XXS.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-IQ4_NL.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-IQ2_M.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-Q2_K_S.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-IQ1_M.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-IQ2_XXS.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-IQ2_XS.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-IQ2_S.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-IQ1_S.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-IQ3_XS.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-Q4_1.gguf filter=lfs diff=lfs merge=lfs -text +LLama-Rhino-8B-RAG.i1-IQ3_S.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/LLama-Rhino-8B-RAG.i1-IQ1_M.gguf b/LLama-Rhino-8B-RAG.i1-IQ1_M.gguf new file mode 100644 index 0000000..1a1942a --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-IQ1_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fef07dc32125d5676d20aa7f2d7774d59e9614a727152dce64041660684010d5 +size 2161977568 diff --git a/LLama-Rhino-8B-RAG.i1-IQ1_S.gguf b/LLama-Rhino-8B-RAG.i1-IQ1_S.gguf new file mode 100644 index 0000000..d3baf20 --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-IQ1_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4070b269729b88385ac8bb980f0399602494d2be9f2a3a798980ac99ecca886c +size 2019633376 diff --git a/LLama-Rhino-8B-RAG.i1-IQ2_M.gguf b/LLama-Rhino-8B-RAG.i1-IQ2_M.gguf new file mode 100644 index 0000000..33bc404 --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-IQ2_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1411b6783d15fe21e55c7199d7228268a07fae6e88e8f683b9a8e1cae9317547 +size 2948286688 diff --git a/LLama-Rhino-8B-RAG.i1-IQ2_S.gguf b/LLama-Rhino-8B-RAG.i1-IQ2_S.gguf new file mode 100644 index 0000000..5676df5 --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-IQ2_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d4e94ee1e8546f1c5abc97ff1765adc17cd53254ac7727d456c41b14c471835 +size 2758494432 diff --git a/LLama-Rhino-8B-RAG.i1-IQ2_XS.gguf b/LLama-Rhino-8B-RAG.i1-IQ2_XS.gguf new file mode 100644 index 0000000..8cfa32d --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-IQ2_XS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6ac34e989f43e8dca8f488e5fdf72b1ca9584aad632868e3292c4e38c55ae1e +size 2605787360 diff --git a/LLama-Rhino-8B-RAG.i1-IQ2_XXS.gguf b/LLama-Rhino-8B-RAG.i1-IQ2_XXS.gguf new file mode 100644 index 0000000..d37bd66 --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-IQ2_XXS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ad4ea61ccd03363e08e2701319ee6ef7946836e587381133ecc493524b58df1 +size 2399217888 diff --git a/LLama-Rhino-8B-RAG.i1-IQ3_M.gguf b/LLama-Rhino-8B-RAG.i1-IQ3_M.gguf new file mode 100644 index 0000000..d8aafb2 --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-IQ3_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e83b64235171b45e64da87c8e513b8227467607676ba030349d99893ca4723a +size 3784829152 diff --git a/LLama-Rhino-8B-RAG.i1-IQ3_S.gguf b/LLama-Rhino-8B-RAG.i1-IQ3_S.gguf new file mode 100644 index 0000000..0769d02 --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-IQ3_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa8e733b64201a83c9af5f59dfa97b2bab5c269edac531538e23f0da2c08f3ff +size 3682330848 diff --git a/LLama-Rhino-8B-RAG.i1-IQ3_XS.gguf b/LLama-Rhino-8B-RAG.i1-IQ3_XS.gguf new file mode 100644 index 0000000..a9d2ee1 --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-IQ3_XS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7161a3c0b30de47a6eca76f6c27f16214911016530beaa79d87c7a65be8ff5a1 +size 3518752992 diff --git a/LLama-Rhino-8B-RAG.i1-IQ3_XXS.gguf b/LLama-Rhino-8B-RAG.i1-IQ3_XXS.gguf new file mode 100644 index 0000000..bc07973 --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-IQ3_XXS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21c248fb392c934efa8145b08f2591bc9aa326b699d34b4f6f5370423d337509 +size 3274918112 diff --git a/LLama-Rhino-8B-RAG.i1-IQ4_NL.gguf b/LLama-Rhino-8B-RAG.i1-IQ4_NL.gguf new file mode 100644 index 0000000..47251e4 --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-IQ4_NL.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f727600305e15548ac0a5e774d53ecd896e666abd2766a8609d55f4000cd78f5 +size 4677994720 diff --git a/LLama-Rhino-8B-RAG.i1-IQ4_XS.gguf b/LLama-Rhino-8B-RAG.i1-IQ4_XS.gguf new file mode 100644 index 0000000..700c5dd --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-IQ4_XS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d6b488a1fdc01a6363de2a8c4751fa4ca039d67f6f2b4297689dd8b48b50862 +size 4447668448 diff --git a/LLama-Rhino-8B-RAG.i1-Q2_K.gguf b/LLama-Rhino-8B-RAG.i1-Q2_K.gguf new file mode 100644 index 0000000..c752ced --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-Q2_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be513053e217e1f0f4f5d1340945a154c577407609847f4fe2a167b4c595be33 +size 3179137248 diff --git a/LLama-Rhino-8B-RAG.i1-Q2_K_S.gguf b/LLama-Rhino-8B-RAG.i1-Q2_K_S.gguf new file mode 100644 index 0000000..98d0295 --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-Q2_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:802f7f1f99d7667f5227ada1a5eff10cc2f596a3ea64941285c76f69d3698d26 +size 2988820704 diff --git a/LLama-Rhino-8B-RAG.i1-Q3_K_L.gguf b/LLama-Rhino-8B-RAG.i1-Q3_K_L.gguf new file mode 100644 index 0000000..87d9ebb --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-Q3_K_L.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12c6d67f0eb7f24ac5737b4930ac267124ea11ba75e058ee7d96c3ad3206e38e +size 4321962208 diff --git a/LLama-Rhino-8B-RAG.i1-Q3_K_M.gguf b/LLama-Rhino-8B-RAG.i1-Q3_K_M.gguf new file mode 100644 index 0000000..d664142 --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-Q3_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d56b311ce28d9ba4d55ed52d8a831ef1e6e46ba6c5415c8700179b86418cc312 +size 4018923744 diff --git a/LLama-Rhino-8B-RAG.i1-Q3_K_S.gguf b/LLama-Rhino-8B-RAG.i1-Q3_K_S.gguf new file mode 100644 index 0000000..9449a3c --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-Q3_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dfc1f76d217003b38559911e0761ce9c8acec57bd0f874b238b8e5f0e17b602 +size 3664505056 diff --git a/LLama-Rhino-8B-RAG.i1-Q4_0.gguf b/LLama-Rhino-8B-RAG.i1-Q4_0.gguf new file mode 100644 index 0000000..fad1b34 --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-Q4_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87039ef5f3b4cc42c317c2d53ab151b4492f5b0fafbee0bc0d1b10cf0b4526a9 +size 4675897568 diff --git a/LLama-Rhino-8B-RAG.i1-Q4_1.gguf b/LLama-Rhino-8B-RAG.i1-Q4_1.gguf new file mode 100644 index 0000000..7da3ed3 --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-Q4_1.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45a949b87dc0b1c53f8ef9dc38e07869b86d7613430dcf428eb9d3f998f55c8b +size 5130258656 diff --git a/LLama-Rhino-8B-RAG.i1-Q4_K_M.gguf b/LLama-Rhino-8B-RAG.i1-Q4_K_M.gguf new file mode 100644 index 0000000..baff721 --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60d286db713bec60872d91a71a0638efb00af589735843be8809d76298a9763b +size 4920740064 diff --git a/LLama-Rhino-8B-RAG.i1-Q4_K_S.gguf b/LLama-Rhino-8B-RAG.i1-Q4_K_S.gguf new file mode 100644 index 0000000..92913d5 --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-Q4_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:070afa2792c40e30a19e5778261c2422c0f33eb7f07fedfa8d2cc7c4d4b9a7ef +size 4692674784 diff --git a/LLama-Rhino-8B-RAG.i1-Q5_K_M.gguf b/LLama-Rhino-8B-RAG.i1-Q5_K_M.gguf new file mode 100644 index 0000000..ef5c546 --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-Q5_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a040d9e6b917f2986a5f7419583cae857eeee70404eb155480f603939df56fb4 +size 5732993248 diff --git a/LLama-Rhino-8B-RAG.i1-Q5_K_S.gguf b/LLama-Rhino-8B-RAG.i1-Q5_K_S.gguf new file mode 100644 index 0000000..03c3f97 --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-Q5_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54469dfdf4d6fc2593244873066adff0249624804280e3477b19afe165d4f89e +size 5599299808 diff --git a/LLama-Rhino-8B-RAG.i1-Q6_K.gguf b/LLama-Rhino-8B-RAG.i1-Q6_K.gguf new file mode 100644 index 0000000..54cee1b --- /dev/null +++ b/LLama-Rhino-8B-RAG.i1-Q6_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6be93d2daf2a2144fbd7462e131215fac00a0f59b85561c8be224fd84a320fc6 +size 6596012256 diff --git a/README.md b/README.md new file mode 100644 index 0000000..a7cafb5 --- /dev/null +++ b/README.md @@ -0,0 +1,83 @@ +--- +base_model: QomSSLab/LLama-Rhino-8B-RAG +language: +- en +library_name: transformers +license: apache-2.0 +quantized_by: mradermacher +tags: +- text-generation-inference +- transformers +- unsloth +- llama +- trl +- sft +--- +## About + + + + + + +weighted/imatrix quants of https://huggingface.co/QomSSLab/LLama-Rhino-8B-RAG + + +static quants are available at https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-GGUF +## Usage + +If you are unsure how to use GGUF files, refer to one of [TheBloke's +READMEs](https://huggingface.co/TheBloke/KafkaLM-70B-German-V0.1-GGUF) for +more details, including on how to concatenate multi-part files. + +## Provided Quants + +(sorted by size, not necessarily quality. IQ-quants are often preferable over similar sized non-IQ quants) + +| Link | Type | Size/GB | Notes | +|:-----|:-----|--------:|:------| +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-IQ1_S.gguf) | i1-IQ1_S | 2.1 | for the desperate | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-IQ1_M.gguf) | i1-IQ1_M | 2.3 | mostly desperate | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-IQ2_XXS.gguf) | i1-IQ2_XXS | 2.5 | | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-IQ2_XS.gguf) | i1-IQ2_XS | 2.7 | | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-IQ2_S.gguf) | i1-IQ2_S | 2.9 | | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-IQ2_M.gguf) | i1-IQ2_M | 3.0 | | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-Q2_K_S.gguf) | i1-Q2_K_S | 3.1 | very low quality | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-Q2_K.gguf) | i1-Q2_K | 3.3 | IQ3_XXS probably better | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-IQ3_XXS.gguf) | i1-IQ3_XXS | 3.4 | lower quality | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-IQ3_XS.gguf) | i1-IQ3_XS | 3.6 | | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-Q3_K_S.gguf) | i1-Q3_K_S | 3.8 | IQ3_XS probably better | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-IQ3_S.gguf) | i1-IQ3_S | 3.8 | beats Q3_K* | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-IQ3_M.gguf) | i1-IQ3_M | 3.9 | | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-Q3_K_M.gguf) | i1-Q3_K_M | 4.1 | IQ3_S probably better | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-Q3_K_L.gguf) | i1-Q3_K_L | 4.4 | IQ3_M probably better | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-IQ4_XS.gguf) | i1-IQ4_XS | 4.5 | | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-Q4_0.gguf) | i1-Q4_0 | 4.8 | fast, low quality | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-IQ4_NL.gguf) | i1-IQ4_NL | 4.8 | prefer IQ4_XS | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-Q4_K_S.gguf) | i1-Q4_K_S | 4.8 | optimal size/speed/quality | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-Q4_K_M.gguf) | i1-Q4_K_M | 5.0 | fast, recommended | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-Q4_1.gguf) | i1-Q4_1 | 5.2 | | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-Q5_K_S.gguf) | i1-Q5_K_S | 5.7 | | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-Q5_K_M.gguf) | i1-Q5_K_M | 5.8 | | +| [GGUF](https://huggingface.co/mradermacher/LLama-Rhino-8B-RAG-i1-GGUF/resolve/main/LLama-Rhino-8B-RAG.i1-Q6_K.gguf) | i1-Q6_K | 6.7 | practically like static Q6_K | + +Here is a handy graph by ikawrakow comparing some lower-quality quant +types (lower is better): + +![image.png](https://www.nethype.de/huggingface_embed/quantpplgraph.png) + +And here are Artefact2's thoughts on the matter: +https://gist.github.com/Artefact2/b5f810600771265fc1e39442288e8ec9 + +## FAQ / Model Request + +See https://huggingface.co/mradermacher/model_requests for some answers to +questions you might have and/or if you want some other model quantized. + +## Thanks + +I thank my company, [nethype GmbH](https://www.nethype.de/), for letting +me use its servers and providing upgrades to my workstation to enable +this work in my free time. Additional thanks to [@nicoboss](https://huggingface.co/nicoboss) for giving me access to his private supercomputer, enabling me to provide many more imatrix quants, at much higher quality, than I would otherwise be able to. + + diff --git a/imatrix.dat b/imatrix.dat new file mode 100644 index 0000000..cd7b550 --- /dev/null +++ b/imatrix.dat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b87ef5a186cf23ed53bc6177672a7a0f7717ecaadeac3301de3ba162f646a22 +size 4988157