初始化项目，由ModelHub XC社区提供模型

Model: andrijdavid/macaroni-7b Source: Original Platform
2026-06-15 02:54:17 +08:00
commit 45cf3cca4c
16 changed files with 91438 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,35 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.npy filter=lfs diff=lfs merge=lfs -text
 *.npz filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pickle filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
--- a/README.md
+++ b/README.md
@@ -0,0 +1,145 @@
 ---
 language:
 - en
 license: apache-2.0
 tags:
 - mistral
 - merge
 model-index:
 - name: macaroni-7b
  results:
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: AI2 Reasoning Challenge (25-Shot)
      type: ai2_arc
      config: ARC-Challenge
      split: test
      args:
        num_few_shot: 25
    metrics:
    - type: acc_norm
      value: 73.12
      name: normalized accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=andrijdavid/macaroni-7b
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: HellaSwag (10-Shot)
      type: hellaswag
      split: validation
      args:
        num_few_shot: 10
    metrics:
    - type: acc_norm
      value: 88.17
      name: normalized accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=andrijdavid/macaroni-7b
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: MMLU (5-Shot)
      type: cais/mmlu
      config: all
      split: test
      args:
        num_few_shot: 5
    metrics:
    - type: acc
      value: 64.58
      name: accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=andrijdavid/macaroni-7b
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: TruthfulQA (0-shot)
      type: truthful_qa
      config: multiple_choice
      split: validation
      args:
        num_few_shot: 0
    metrics:
    - type: mc2
      value: 68.76
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=andrijdavid/macaroni-7b
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: Winogrande (5-shot)
      type: winogrande
      config: winogrande_xl
      split: validation
      args:
        num_few_shot: 5
    metrics:
    - type: acc
      value: 84.37
      name: accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=andrijdavid/macaroni-7b
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: GSM8k (5-shot)
      type: gsm8k
      config: main
      split: test
      args:
        num_few_shot: 5
    metrics:
    - type: acc
      value: 68.61
      name: accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=andrijdavid/macaroni-7b
      name: Open LLM Leaderboard
 ---
 # Macaroni 7B
 This is an experimental merge of pre-trained mistral language models with fblgit/UNA-TheBeagle-7b-v1.
 # Disclaimer
  * No Warranty: The Model is provided on an "AS IS" basis, without warranty of any kind. The entire risk as to the quality, performance and use of The Model is with the user.
  * Limitation of Liability: In no event shall the creator(s) of The Model be liable for any claim, damages, or other liability, whether in an action of contract, tort or otherwise, arising from, out of, or in connection with The Model or the use or other dealings in The Model.
  * Accuracy and Risks: The creator(s) do not warrant that The Model is free from errors or inaccuracies and disclaim any responsibility for any harm resulting from the use of The Model.
  * Use at Your Own Risk: Users are solely responsible for any consequences resulting from the use of The Model, including but not limited to any changes made to The Model by the user or the results produced by The Model.
  * Compliance with Laws: Users are solely responsible for ensuring that their use of The Model complies with all applicable laws, regulations, and policies.
  * Ethical Use: Users are encouraged to use The Model ethically and responsibly. The creator(s) disclaim any responsibility for misuse or unethical use of The Model.
  * Modifications: Any modifications made to The Model by third parties are the sole responsibility of the party making the modifications. The original creator(s) of The Model shall not be responsible for any modifications made by third parties.
 # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
 Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_andrijdavid__macaroni-7b)
 |             Metric              |Value|
 |---------------------------------|----:|
 |Avg.                             |74.60|
 |AI2 Reasoning Challenge (25-Shot)|73.12|
 |HellaSwag (10-Shot)              |88.17|
 |MMLU (5-Shot)                    |64.58|
 |TruthfulQA (0-shot)              |68.76|
 |Winogrande (5-shot)              |84.37|
 |GSM8k (5-shot)                   |68.61|
--- a/config.json
+++ b/config.json
@@ -0,0 +1,28 @@
 {
  "_name_or_path": "udkai/Turdus",
  "architectures": [
    "MistralForCausalLM"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 32768,
  "model_type": "mistral",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_theta": 10000.0,
  "sliding_window": 4096,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.35.2",
  "unsloth_version": "2024.1",
  "use_cache": true,
  "vocab_size": 32000
 }
--- a/model-00001-of-00008.safetensors
+++ b/model-00001-of-00008.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:de83a7c783e49cb79787946afd57d779f21d0f6c48a26c8fbf3c36d0ea458f4f
 size 1889595352
--- a/model-00002-of-00008.safetensors
+++ b/model-00002-of-00008.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:64c0997a6f4b23d375f2dc38e09e7d6a81a35538689b27cb31baf951ead1239b
 size 1979781416
--- a/model-00003-of-00008.safetensors
+++ b/model-00003-of-00008.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:f9bb8baa123900da3ad8532d54b7293ee0e281c5a27556b3ab966032581c3d26
 size 1988195080
--- a/model-00004-of-00008.safetensors
+++ b/model-00004-of-00008.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:50f64ffb5a5070a9b186f0a97f1482f38abdae698068179b73a613fd18e16294
 size 1937846944
--- a/model-00005-of-00008.safetensors
+++ b/model-00005-of-00008.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:3576691521c955258bc6f5a727c0c4110b24f247e303c938056e3ce618488e88
 size 1988178496
--- a/model-00006-of-00008.safetensors
+++ b/model-00006-of-00008.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:b0be9c7e3d600f19a7310803cac11f1faa479d0f1a99449b281101be7e7cf4e7
 size 1998655576
--- a/model-00007-of-00008.safetensors
+++ b/model-00007-of-00008.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:e696588f5849f16ecc54fc139ca2ff441cbd5ab51e5ea2125b0cc6a2ea73ab38
 size 1946235664
--- a/model-00008-of-00008.safetensors
+++ b/model-00008-of-00008.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:bdf8fb5456822796dc3d57792985e932194d238c2e35d6932b9fa1c3b0028f46
 size 755009456
--- a/model.safetensors.index.json
+++ b/model.safetensors.index.json
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,35 @@
 {
  "additional_special_tokens": [
    "<unk>",
    "<s>",
    "</s>"
  ],
  "bos_token": {
    "content": "<s>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "eos_token": {
    "content": "</s>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "pad_token": {
    "content": "<unk>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "unk_token": {
    "content": "<unk>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  }
 }
--- a/tokenizer.json
+++ b/tokenizer.json
--- a/tokenizer.model
+++ b/tokenizer.model
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -0,0 +1,45 @@
 {
  "added_tokens_decoder": {
    "0": {
      "content": "<unk>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "1": {
      "content": "<s>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "2": {
      "content": "</s>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    }
  },
  "additional_special_tokens": [
    "<unk>",
    "<s>",
    "</s>"
  ],
  "bos_token": "<s>",
  "clean_up_tokenization_spaces": false,
  "eos_token": "</s>",
  "legacy": true,
  "model_max_length": 255,
  "pad_token": "<unk>",
  "padding_side": "right",
  "sp_model_kwargs": {},
  "spaces_between_special_tokens": false,
  "tokenizer_class": "LlamaTokenizer",
  "unk_token": "<unk>",
  "use_default_system_prompt": true
 }