From eacc9f71a141d789268623f8847adfe17e32c83c Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Mon, 1 Jun 2026 05:10:15 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: agentlans/Gemma2-9B-AdvancedFuse Source: Original Platform --- .gitattributes | 35 +++++++ README.md | 151 +++++++++++++++++++++++++++++++ config.json | 34 +++++++ generation_config.json | 3 + model-00001-of-00004.safetensors | 3 + model-00002-of-00004.safetensors | 3 + model-00003-of-00004.safetensors | 3 + model-00004-of-00004.safetensors | 3 + model.safetensors.index.json | 3 + special_tokens_map.json | 3 + tokenizer.json | 3 + tokenizer.model | 3 + tokenizer_config.json | 3 + 13 files changed, 250 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 model-00001-of-00004.safetensors create mode 100644 model-00002-of-00004.safetensors create mode 100644 model-00003-of-00004.safetensors create mode 100644 model-00004-of-00004.safetensors create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer.model create mode 100644 tokenizer_config.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a6344aa --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..568a8ed --- /dev/null +++ b/README.md @@ -0,0 +1,151 @@ +--- +datasets: +- agentlans/crash-course +base_model: +- google/gemma-2-9b-it +- FuseAI/FuseChat-Gemma-2-9B-Instruct +- jsgreenawalt/gemma-2-9B-it-advanced-v2.1 +tags: +- gemma2 +language: +- en +pipeline_tag: text-generation +license: gemma +model-index: +- name: Gemma2-9B-AdvancedFuse + results: + - task: + type: text-generation + name: Text Generation + dataset: + name: IFEval (0-Shot) + type: wis-k/instruction-following-eval + split: train + args: + num_few_shot: 0 + metrics: + - type: inst_level_strict_acc and prompt_level_strict_acc + value: 15.43 + name: averaged accuracy + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard#/?search=agentlans%2FGemma2-9B-AdvancedFuse + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: BBH (3-Shot) + type: SaylorTwift/bbh + split: test + args: + num_few_shot: 3 + metrics: + - type: acc_norm + value: 40.52 + name: normalized accuracy + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard#/?search=agentlans%2FGemma2-9B-AdvancedFuse + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: MATH Lvl 5 (4-Shot) + type: lighteval/MATH-Hard + split: test + args: + num_few_shot: 4 + metrics: + - type: exact_match + value: 7.55 + name: exact match + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard#/?search=agentlans%2FGemma2-9B-AdvancedFuse + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: GPQA (0-shot) + type: Idavidrein/gpqa + split: train + args: + num_few_shot: 0 + metrics: + - type: acc_norm + value: 11.3 + name: acc_norm + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard#/?search=agentlans%2FGemma2-9B-AdvancedFuse + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: MuSR (0-shot) + type: TAUR-Lab/MuSR + args: + num_few_shot: 0 + metrics: + - type: acc_norm + value: 11.99 + name: acc_norm + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard#/?search=agentlans%2FGemma2-9B-AdvancedFuse + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: MMLU-PRO (5-shot) + type: TIGER-Lab/MMLU-Pro + config: main + split: test + args: + num_few_shot: 5 + metrics: + - type: acc + value: 33.34 + name: accuracy + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard#/?search=agentlans%2FGemma2-9B-AdvancedFuse + name: Open LLM Leaderboard +--- +# Gemma2-9B-AdvancedFuse + +Gemma2-9B-AdvancedFuse is an experimental, open-source large language model (LLM) with 9 billion parameters. +It aims to combine the strengths of [FuseAI/FuseChat-Gemma-2-9B-Instruct](https://huggingface.co/fuseai/fusechat-gemma-2-9b-instruct) and +[jsgreenawalt/gemma-2-9B-it-advanced-v2.1](https://huggingface.co/jsgreenawalt/gemma-2-9b-it-advanced-v2.1) through additive linear merging, +further fine-tuned on a 12K row dataset from [agentlans/crash-course](https://huggingface.co/datasets/agentlans/crash-course) +for enhanced chat and instruct performance, including math and multilingual prompts. + +## Capabilities +- **Text Generation:** Generates coherent emails, summaries, and notes. This model card was primarily generated by the model itself. +- **Instruction Following:** Demonstrates strong ability to understand and execute instructions in conversational settings. +- **Roleplaying:** Can engage in third-person narrative roleplay but may exhibit common GPT expressions or clichés. + +### Limitations +As with most large language models: +- **Factual Errors:** May generate incorrect or outdated information due to data biases. +- **Mathematical Operations:** Struggles with mathematical calculations requiring symbolic reasoning despite its finetuning data. +- **Handling Unsafe Input:** May generate unsafe, biased, or malicious content if provided inappropriate input. Careful prompt engineering is recommended. + +### Model Usage Guidelines +1. Use clear and specific instructions for optimal performance. +2. Verify generated outputs for factual accuracy when critical information is involved. +3. Avoid providing inputs that could lead to harmful or unethical responses. +4. Consider using human review, especially in high-stakes applications. +# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard) +Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/agentlans__Gemma2-9B-AdvancedFuse-details)! +Summarized results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/contents/viewer/default/train?q=agentlans%2FGemma2-9B-AdvancedFuse&sort[column]=Average%20%E2%AC%86%EF%B8%8F&sort[direction]=desc)! + +| Metric |Value (%)| +|-------------------|--------:| +|**Average** | 20.02| +|IFEval (0-Shot) | 15.43| +|BBH (3-Shot) | 40.52| +|MATH Lvl 5 (4-Shot)| 7.55| +|GPQA (0-shot) | 11.30| +|MuSR (0-shot) | 11.99| +|MMLU-PRO (5-shot) | 33.34| + diff --git a/config.json b/config.json new file mode 100644 index 0000000..977b52e --- /dev/null +++ b/config.json @@ -0,0 +1,34 @@ +{ + "_name_or_path": "/drive2/Gemma2-9B-FuseAdvanced", + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": 1, + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 3584, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 16, + "num_hidden_layers": 42, + "num_key_value_heads": 8, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "sliding_window_size": 4096, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 256000 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..047d972 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63a0bfb1acde23d1a75dae36d7c572b1c31052bf72f5246e03fa77340ba1c995 +size 168 diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000..1b3d6e2 --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb96c2e305d6fa3b1d72a7d6cc179ac94f0e072a3e8040fc3cb4ef3bea66ddb2 +size 4903351912 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000..cedd983 --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24679cc39ddcc191f97bce422f38bf538dc321d90f6a545826e72074399ce9f9 +size 4947570872 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000..6eeca8d --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f8f5ca444aaee607c4119e1d4c84409e740de42e426e9449db7d8bb7675f971 +size 4962221464 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000..971f868 --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0e89fbf9386e935f1db8a4e22df9eed0ed68866cc5c7f4ac3bf05d07b201613 +size 3670322200 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..3a07064 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c38b39b80b2d7aa422464a9b816af4c30d2177e471b89e5325df1336a23ad284 +size 39072 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..1460f70 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baec30ea10906f16adb8c18af7a34023002c1746542612b8b41c9f09e1351351 +size 636 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..a4a305d --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7eee611703c5ce5d1eee32d9cdcfe465647b8aff0c1dfb3bed7ad7dbb05060 +size 34362873 diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..796efe9 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..2ffdf58 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa0044b495f59bebab6dc18effe77f39be9398a22d34c1009c26955a2a38dbd4 +size 47078