commit 0e82ae105f28267c7397403937668358bc57d998 Author: ModelHub XC Date: Sun May 17 22:16:31 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: EleutherAI/llemma_7b_muinstruct_camelmath Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..53d7257 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,47 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..d967def --- /dev/null +++ b/README.md @@ -0,0 +1,39 @@ +--- +license: apache-2.0 +datasets: +- EleutherAI/muInstruct +- camel-ai/math +language: +- en +tags: +- math +--- + +`llemma_7b_muinstruct_camelmath` is an instruction-following finetune of [Llemma 7B](https://huggingface.co/EleutherAI/llemma_7b), trained on the [μInstruct](https://huggingface.co/datasets/EleutherAI/muInstruct) and [camel-ai/math](https://huggingface.co/datasets/camel-ai/math) datasets. + +## Input Formatting +Format input queries as follows: +``` +input_text = f"Input:{input}\n\nResponse:" +``` + +Note that due to an error during training, this model's end-of-sequence token ID is `0` instead of the `2` which is standard for Llama-2 based models. Inference APIs should handle this automatically by reading this repo's `config.json`, but be aware of this difference if you are doing token surgery. + +## Evals +` +llemma_7b_muinstruct_camelmath` compares favorably to other 7B parameter models on the [Hungarian Math Exam](https://huggingface.co/datasets/keirp/hungarian_national_hs_finals_exam/blob/main/README.md). It surpasses the few-shot performance of Llemma 7B whilst being the strongest Llama-2 7B based model. + +| Model | Exam Score | +| ------------------------------------------------------------------------------ | ---------- | +| [Code Llama 7B](https://huggingface.co/codellama/CodeLlama-7b-hf) (few-shot) | 8\% | +| [MetaMath 7B](https://huggingface.co/meta-math/MetaMath-7B-V1.0) | 20\% | +| [MAmmoTH 7B](https://huggingface.co/TIGER-Lab/MAmmoTH-7B) | 17\% | +| [MAmmoTH Coder 7B](https://huggingface.co/TIGER-Lab/MAmmoTH-Coder-7B) | 11\% | +| [Llemma 7B](https://huggingface.co/EleutherAI/llemma_7b) (few-shot) | 23\% | +| Llemma_7B_muinstruct_camelmath | 25\% | +| - | - | +| [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-v0.1) (few-shot) | 22\% | +| [MetaMath Mistral 7B](https://huggingface.co/meta-math/MetaMath-Mistral-7B) | 29\% | +| [OpenChat 3.5](https://huggingface.co/openchat/openchat_3.5) | 37\% | + + diff --git a/config.json b/config.json new file mode 100644 index 0000000..3a2c059 --- /dev/null +++ b/config.json @@ -0,0 +1,22 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 0, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 11008, + "max_position_embeddings": 4096, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "pad_token_id": 0, + "rms_norm_eps": 1e-06, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "use_cache": true, + "vocab_size": 32016 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..69da086 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 0, + "pad_token_id": 0, + "transformers_version": "4.30.2" +} diff --git a/pytorch_model-00001-of-00003.bin b/pytorch_model-00001-of-00003.bin new file mode 100644 index 0000000..52b9838 --- /dev/null +++ b/pytorch_model-00001-of-00003.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf73fc8b424994dedf1ce08093e2ee6838541affca66c4b76ceae68ba5dcf3cf +size 9878251730 diff --git a/pytorch_model-00002-of-00003.bin b/pytorch_model-00002-of-00003.bin new file mode 100644 index 0000000..f91feab --- /dev/null +++ b/pytorch_model-00002-of-00003.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdadc9c8173dd2679336b70ef5b64d311ae19b6d388d260b202b556926550f43 +size 9894801014 diff --git a/pytorch_model-00003-of-00003.bin b/pytorch_model-00003-of-00003.bin new file mode 100644 index 0000000..f43d2ab --- /dev/null +++ b/pytorch_model-00003-of-00003.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7416526721b63be3687430df31a743a306f86e73d4120d016cb03e5d79a153b +size 7181252793 diff --git a/pytorch_model.bin.index.json b/pytorch_model.bin.index.json new file mode 100644 index 0000000..b8d25be --- /dev/null +++ b/pytorch_model.bin.index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83a799c899ddda20ec6d08e8806507f1fd296809ea0fbcbf46e253798ac2861f +size 26788 diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..f6722e8 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6 +size 500058