commit f154a0d5435a5e68893c308c2c3ce90f8094a5d8 Author: ModelHub XC Date: Wed May 27 00:24:18 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: hfl/chinese-llama-2-7b-gguf Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..e3289ed --- /dev/null +++ b/.gitattributes @@ -0,0 +1,49 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +ggml-model-q2_k.gguf filter=lfs diff=lfs merge=lfs -text +ggml-model-q3_k.gguf filter=lfs diff=lfs merge=lfs -text +ggml-model-q4_k.gguf filter=lfs diff=lfs merge=lfs -text +ggml-model-q5_k.gguf filter=lfs diff=lfs merge=lfs -text +ggml-model-q6_k.gguf filter=lfs diff=lfs merge=lfs -text +ggml-model-q4_0.gguf filter=lfs diff=lfs merge=lfs -text +ggml-model-q5_0.gguf filter=lfs diff=lfs merge=lfs -text +ggml-model-q8_0.gguf filter=lfs diff=lfs merge=lfs -text +ggml-model-f16.gguf filter=lfs diff=lfs merge=lfs -text +ggml-model-q2_k-im.gguf filter=lfs diff=lfs merge=lfs -text +ggml-model-q3_k-im.gguf filter=lfs diff=lfs merge=lfs -text +ggml-model-q4_k-im.gguf filter=lfs diff=lfs merge=lfs -text +ggml-model-q5_k-im.gguf filter=lfs diff=lfs merge=lfs -text +ggml-model-q6_k-im.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..f44294f --- /dev/null +++ b/README.md @@ -0,0 +1,37 @@ +--- +license: apache-2.0 +language: +- zh +- en +--- + +# Chinese-LLaMA-2-7B-GGUF + +This repository contains the GGUF-v3 models (llama.cpp compatible) for **Chinese-LLaMA-2-7B**. + + +## Performance + +Metric: PPL, lower is better + +| Quant | original | imatrix (`-im`) | +|-----|------|------| +| Q2_K | 15.1160 +/- 0.30469 | 12.7682 +/- 0.26022 | +| Q3_K | 9.9588 +/- 0.20549 | 9.8508 +/- 0.20484 | +| Q4_0 | 9.8085 +/- 0.20350 | - | +| Q4_K | 9.5802 +/- 0.20015 | 9.6327 +/- 0.20219 | +| Q5_0 | 9.4783 +/- 0.19622 | - | +| Q5_K | 9.5132 +/- 0.19989 | 9.4447 +/- 0.19772 | +| Q6_K | 9.4640 +/- 0.19909 | 9.4507 +/- 0.19849 | +| Q8_0 | 9.4659 +/- 0.19927 | - | +| F16 | 9.4627 +/- 0.19921 | - | + +*The model with `-im` suffix is generated with important matrix, which has generally better performance (not always though).* + + +## Others + + +For Hugging Face version, please see: https://huggingface.co/hfl/chinese-llama-2-7b + +Please refer to [https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/) for more details. \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..494a783 --- /dev/null +++ b/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 11008, + "max_position_embeddings": 4096, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "pad_token_id": 0, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.31.0", + "use_cache": true, + "vocab_size": 55296 +} \ No newline at end of file diff --git a/ggml-model-f16.gguf b/ggml-model-f16.gguf new file mode 100644 index 0000000..ce866e7 --- /dev/null +++ b/ggml-model-f16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3421ff6f8e7e1f8dae3d65c9fd1f4b50f1a834931a56ea9ab6a3a83124d430be +size 13860294208 diff --git a/ggml-model-q2_k-im.gguf b/ggml-model-q2_k-im.gguf new file mode 100644 index 0000000..6c337b7 --- /dev/null +++ b/ggml-model-q2_k-im.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5816077398c72280c3796f65dc7893f5b76c76d3ab2d8b634063ac2578ca1f08 +size 2642955904 diff --git a/ggml-model-q2_k.gguf b/ggml-model-q2_k.gguf new file mode 100644 index 0000000..6292708 --- /dev/null +++ b/ggml-model-q2_k.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2630ebdf528d939a0da3ee2f75df3d946fc96f56c4328da717de771ebac980f1 +size 2936032800 diff --git a/ggml-model-q3_k-im.gguf b/ggml-model-q3_k-im.gguf new file mode 100644 index 0000000..424a1ef --- /dev/null +++ b/ggml-model-q3_k-im.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b08c20f23a61d6ffe1d5797dfcc01b37af5ff1ada676adf9d942a023a11d6095 +size 3417788032 diff --git a/ggml-model-q3_k.gguf b/ggml-model-q3_k.gguf new file mode 100644 index 0000000..d613692 --- /dev/null +++ b/ggml-model-q3_k.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7446f1eff1c42465ca020dd37bd64a52ac3de2ad663017d9ea8d02ebec667074 +size 3417787936 diff --git a/ggml-model-q4_0.gguf b/ggml-model-q4_0.gguf new file mode 100644 index 0000000..2a07460 --- /dev/null +++ b/ggml-model-q4_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e10e4cbb10b677d4d887efe56c7b0e582c3164fbb1a8ed565ebce4e0fc91019 +size 3958263328 diff --git a/ggml-model-q4_k-im.gguf b/ggml-model-q4_k-im.gguf new file mode 100644 index 0000000..13f518b --- /dev/null +++ b/ggml-model-q4_k-im.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32ac0987daec9be049c864d4995ff15a3e2d344fd517c8685bab9f26aad26fad +size 4213460608 diff --git a/ggml-model-q4_k.gguf b/ggml-model-q4_k.gguf new file mode 100644 index 0000000..32c8fbe --- /dev/null +++ b/ggml-model-q4_k.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c34b8017d27234882d20f68dc4fde4d8f57c0b4f0a121cf33990763e6dcabb6 +size 4213460512 diff --git a/ggml-model-q5_0.gguf b/ggml-model-q5_0.gguf new file mode 100644 index 0000000..8c75ff1 --- /dev/null +++ b/ggml-model-q5_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f30527dd87012b0588ba5c88c6ebe637098387c30d5587283fec1082f5f5907c +size 4796075552 diff --git a/ggml-model-q5_k-im.gguf b/ggml-model-q5_k-im.gguf new file mode 100644 index 0000000..f91f5e2 --- /dev/null +++ b/ggml-model-q5_k-im.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39c62fc1849d48728a09e58e7cfde8bbb096c3f833f553f93bb788d48793b1b5 +size 4927540864 diff --git a/ggml-model-q5_k.gguf b/ggml-model-q5_k.gguf new file mode 100644 index 0000000..ca2d2b9 --- /dev/null +++ b/ggml-model-q5_k.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f029ba01d7bd63822a98e23e98e25e3ff88f3aeb3328daf096121140b5d2b63 +size 4927540768 diff --git a/ggml-model-q6_k-im.gguf b/ggml-model-q6_k-im.gguf new file mode 100644 index 0000000..5072486 --- /dev/null +++ b/ggml-model-q6_k-im.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0959711faaf5d01aabc041328f1327020a8a194fb3724719ead1b85dec3ce3b9 +size 5686251136 diff --git a/ggml-model-q6_k.gguf b/ggml-model-q6_k.gguf new file mode 100644 index 0000000..8c80d3a --- /dev/null +++ b/ggml-model-q6_k.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09ab381a4f6411b3256cd7715f9ef9fdaa28bdb492f318e585c4dbd68e9609bc +size 5686251040 diff --git a/ggml-model-q8_0.gguf b/ggml-model-q8_0.gguf new file mode 100644 index 0000000..1e50b30 --- /dev/null +++ b/ggml-model-q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ccd24f7095867eeae6598a688f5f6a957baa213dd2a36238eda233240bc801e +size 7364365856