commit dbe89051fadddc5179e74c126a20c0f501b57743 Author: ModelHub XC Date: Sun Jun 28 12:58:16 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: BEE-spoke-data/smol_llama-220M-openhermes Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a6344aa --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..3ea79e9 --- /dev/null +++ b/README.md @@ -0,0 +1,306 @@ +--- +license: apache-2.0 +base_model: BEE-spoke-data/smol_llama-220M-GQA +datasets: +- teknium/openhermes +inference: + parameters: + do_sample: true + renormalize_logits: true + temperature: 0.25 + top_p: 0.95 + top_k: 50 + min_new_tokens: 2 + max_new_tokens: 96 + repetition_penalty: 1.03 + no_repeat_ngram_size: 5 + epsilon_cutoff: 0.0008 +widget: +- text: "Below is an instruction that describes a task, paired with an input that\ + \ provides further context. Write a response that appropriately completes the\ + \ request. \n \n### Instruction: \n \nWrite an ode to Chipotle burritos.\ + \ \n \n### Response: \n" + example_title: burritos +model-index: +- name: smol_llama-220M-openhermes + results: + - task: + type: text-generation + name: Text Generation + dataset: + name: AI2 Reasoning Challenge (25-Shot) + type: ai2_arc + config: ARC-Challenge + split: test + args: + num_few_shot: 25 + metrics: + - type: acc_norm + value: 25.17 + name: normalized accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: HellaSwag (10-Shot) + type: hellaswag + split: validation + args: + num_few_shot: 10 + metrics: + - type: acc_norm + value: 28.98 + name: normalized accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: MMLU (5-Shot) + type: cais/mmlu + config: all + split: test + args: + num_few_shot: 5 + metrics: + - type: acc + value: 26.17 + name: accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: TruthfulQA (0-shot) + type: truthful_qa + config: multiple_choice + split: validation + args: + num_few_shot: 0 + metrics: + - type: mc2 + value: 43.08 + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: Winogrande (5-shot) + type: winogrande + config: winogrande_xl + split: validation + args: + num_few_shot: 5 + metrics: + - type: acc + value: 52.01 + name: accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: GSM8k (5-shot) + type: gsm8k + config: main + split: test + args: + num_few_shot: 5 + metrics: + - type: acc + value: 0.61 + name: accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: IFEval (0-Shot) + type: HuggingFaceH4/ifeval + args: + num_few_shot: 0 + metrics: + - type: inst_level_strict_acc and prompt_level_strict_acc + value: 15.55 + name: strict accuracy + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: BBH (3-Shot) + type: BBH + args: + num_few_shot: 3 + metrics: + - type: acc_norm + value: 3.11 + name: normalized accuracy + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: MATH Lvl 5 (4-Shot) + type: hendrycks/competition_math + args: + num_few_shot: 4 + metrics: + - type: exact_match + value: 0.0 + name: exact match + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: GPQA (0-shot) + type: Idavidrein/gpqa + args: + num_few_shot: 0 + metrics: + - type: acc_norm + value: 2.35 + name: acc_norm + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: MuSR (0-shot) + type: TAUR-Lab/MuSR + args: + num_few_shot: 0 + metrics: + - type: acc_norm + value: 6.22 + name: acc_norm + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: MMLU-PRO (5-shot) + type: TIGER-Lab/MMLU-Pro + config: main + split: test + args: + num_few_shot: 5 + metrics: + - type: acc + value: 1.34 + name: accuracy + source: + url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=BEE-spoke-data/smol_llama-220M-openhermes + name: Open LLM Leaderboard +--- + + +# BEE-spoke-data/smol_llama-220M-openhermes + +> Please note that this is an experiment, and the model has limitations because it is smol. + + +prompt format is alpaca + + +``` +Below is an instruction that describes a task, paired with an input that +provides further context. Write a response that appropriately completes +the request. + +### Instruction: + +How can I increase my meme production/output? Currently, I only create them in ancient babylonian which is time consuming. + +### Inputs: + +### Response: +``` + +It was trained on inputs so if you have inputs (like some text to ask a question about) then include it under `### Inputs:` + + +## Example + +Output on the text above ^. The inference API is set to sample with low temp so you should see (_at least slightly_) different generations each time. + + +![image/png](https://cdn-uploads.huggingface.co/production/uploads/60bccec062080d33f875cd0c/0nFP2jsBkritnryKmI8NV.png) + +Note that the inference API parameters used here are an initial educated guess, and may be updated over time: + +```yml +inference: + parameters: + do_sample: true + renormalize_logits: true + temperature: 0.25 + top_p: 0.95 + top_k: 50 + min_new_tokens: 2 + max_new_tokens: 96 + repetition_penalty: 1.03 + no_repeat_ngram_size: 5 + epsilon_cutoff: 0.0008 +``` + +Feel free to experiment with the parameters using the model in Python and let us know if you have improved results with other params! + +## Data + +Note that **this checkpoint** was fine-tuned on `teknium/openhermes`, which is generated/synthetic data by an OpenAI model. This means usage of this checkpoint should follow their terms of use: https://openai.com/policies/terms-of-use + + +--- + +# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) +Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_BEE-spoke-data__smol_llama-220M-openhermes) + +| Metric |Value| +|---------------------------------|----:| +|Avg. |29.34| +|AI2 Reasoning Challenge (25-Shot)|25.17| +|HellaSwag (10-Shot) |28.98| +|MMLU (5-Shot) |26.17| +|TruthfulQA (0-shot) |43.08| +|Winogrande (5-shot) |52.01| +|GSM8k (5-shot) | 0.61| + + +# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard) +Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_BEE-spoke-data__smol_llama-220M-openhermes) + +| Metric |Value| +|-------------------|----:| +|Avg. | 4.76| +|IFEval (0-Shot) |15.55| +|BBH (3-Shot) | 3.11| +|MATH Lvl 5 (4-Shot)| 0.00| +|GPQA (0-shot) | 2.35| +|MuSR (0-shot) | 6.22| +|MMLU-PRO (5-shot) | 1.34| + diff --git a/config.json b/config.json new file mode 100644 index 0000000..4017f7e --- /dev/null +++ b/config.json @@ -0,0 +1,28 @@ +{ + "_name_or_path": "BEE-spoke-data/smol_llama-220M-GQA", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 4096, + "max_position_embeddings": 2048, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 10, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.36.2", + "use_cache": false, + "vocab_size": 32128 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..cdd602f --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.36.2", + "use_cache": false +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..de4497f --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d12e8930bfd9c7b4be7d0c4d2c20fede3e14d8c67813d3e22ac4fc64d4822bde +size 435736840 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..492d4b2 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..6c00c74 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..1f350e2 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "trust_remote_code": false, + "unk_token": "", + "use_default_system_prompt": true, + "use_fast": true +}