初始化项目,由ModelHub XC社区提供模型

Model: LLM-Research/OLMoE-1B-7B-0924-SFT
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-23 23:02:12 +08:00
commit 80397fa378
13 changed files with 104216 additions and 0 deletions

35
.gitattributes vendored Normal file
View File

@@ -0,0 +1,35 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text

43
README.md Normal file
View File

@@ -0,0 +1,43 @@
---
license: apache-2.0
language:
- en
tags:
- moe
- olmo
- olmoe
co2_eq_emissions: 1
datasets:
- allenai/tulu-v3.1-mix-preview-4096-OLMoE
base_model: allenai/OLMoE-1B-7B-0924
---
<img alt="OLMoE Logo." src="olmoe-logo.png" width="250px">
# Model Summary
This model is an intermediate training checkpoint during post-training, after the Supervised Fine-Tuning (SFT) step. For best performance, we recommend you use the [OLMoE-Instruct](https://huggingface.co/allenai/OLMoE-1B-7B-0924-Instruct) version.
- **Paper**: https://arxiv.org/abs/2409.02060
- **Pretraining** [Checkpoints](https://hf.co/allenai/OLMoE-1B-7B-0924), [Code](https://github.com/allenai/OLMo/tree/Muennighoff/MoE), [Data](https://huggingface.co/datasets/allenai/OLMoE-mix-0924) and [Logs](https://wandb.ai/ai2-llm/olmoe/reports/OLMoE-1B-7B-0924--Vmlldzo4OTcyMjU3).
- **SFT (Supervised Fine-Tuning)** [Checkpoints](https://huggingface.co/allenai/OLMoE-1B-7B-0924-SFT), [Code](https://github.com/allenai/open-instruct/tree/olmoe-sft), [Data](https://hf.co/datasets/allenai/tulu-v3.1-mix-preview-4096-OLMoE) and [Logs](https://github.com/allenai/OLMoE/blob/main/logs/olmoe-sft-logs.txt).
- **DPO/KTO (Direct Preference Optimization/Kahneman-Tversky Optimization)**, [Checkpoints](https://huggingface.co/allenai/OLMoE-1B-7B-0924-Instruct), [Preference Data](https://hf.co/datasets/allenai/ultrafeedback_binarized_cleaned), [DPO code](https://github.com/allenai/open-instruct/tree/olmoe-sft), [KTO code](https://github.com/Muennighoff/kto/blob/master/kto.py) and [Logs](https://github.com/allenai/OLMoE/blob/main/logs/olmoe-dpo-logs.txt).
Branches:
- `main`: Instruction tuned / supervised finetuned (SFT) model of https://hf.co/allenai/OLMoE-1B-7B-0924 (`main` branch)
- `load-balancing`: Ablation with load balancing loss during SFT
- `non-annealed`: Ablation starting from the checkpoint prior to annealing (branch `step1200000-tokens5033B` of https://hf.co/allenai/OLMoE-1B-7B-0924) rather than the annealed checkpoint (branch `main` of https://hf.co/allenai/OLMoE-1B-7B-0924)
# Citation
```bibtex
@misc{muennighoff2024olmoeopenmixtureofexpertslanguage,
title={OLMoE: Open Mixture-of-Experts Language Models},
author={Niklas Muennighoff and Luca Soldaini and Dirk Groeneveld and Kyle Lo and Jacob Morrison and Sewon Min and Weijia Shi and Pete Walsh and Oyvind Tafjord and Nathan Lambert and Yuling Gu and Shane Arora and Akshita Bhagia and Dustin Schwenk and David Wadden and Alexander Wettig and Binyuan Hui and Tim Dettmers and Douwe Kiela and Ali Farhadi and Noah A. Smith and Pang Wei Koh and Amanpreet Singh and Hannaneh Hajishirzi},
year={2024},
eprint={2409.02060},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2409.02060},
}
```

31
config.json Normal file
View File

@@ -0,0 +1,31 @@
{
"architectures": [
"OlmoeForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"clip_qkv": null,
"eos_token_id": 50279,
"hidden_act": "silu",
"hidden_size": 2048,
"initializer_range": 0.02,
"intermediate_size": 1024,
"max_position_embeddings": 4096,
"model_type": "olmoe",
"norm_topk_prob": false,
"num_attention_heads": 16,
"num_experts": 64,
"num_experts_per_tok": 8,
"num_hidden_layers": 16,
"num_key_value_heads": 16,
"output_router_logits": false,
"pad_token_id": 1,
"rope_scaling": null,
"rope_theta": 10000.0,
"router_aux_loss_coef": 0.01,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.44.0.dev0",
"use_cache": true,
"vocab_size": 50304
}

1
configuration.json Normal file
View File

@@ -0,0 +1 @@
{"framework": "pytorch", "task": "text-generation", "allow_remote": true}

6
generation_config.json Normal file
View File

@@ -0,0 +1,6 @@
{
"_from_model_config": true,
"eos_token_id": 50279,
"pad_token_id": 1,
"transformers_version": "4.44.0.dev0"
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:660f006150a625e0b2f6df2bb1344a1d33ba2185fe92dccfffbf3f7494856c6b
size 4997744872

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5688cc530e68648e30a86f33a0a682c31e764e54b4feca21e866b9298a50e390
size 4997235176

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ed7c22a0294829f52e8a6a66571c3acaeb1cc53b0c890075622b21f43ceebe59
size 3843741912

3226
model.safetensors.index.json Normal file

File diff suppressed because it is too large Load Diff

BIN
olmoe-logo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

23
special_tokens_map.json Normal file
View File

@@ -0,0 +1,23 @@
{
"bos_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<|padding|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

100603
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

239
tokenizer_config.json Normal file
View File

@@ -0,0 +1,239 @@
{
"add_bos_token": false,
"add_eos_token": false,
"add_prefix_space": false,
"added_tokens_decoder": {
"0": {
"content": "|||IP_ADDRESS|||",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1": {
"content": "<|padding|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50254": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50255": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50256": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50257": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50258": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50259": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50260": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50261": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50262": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50263": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50264": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50265": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50266": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50267": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50268": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50269": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50270": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50271": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50272": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50273": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50274": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50275": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50276": {
"content": " ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50277": {
"content": "|||EMAIL_ADDRESS|||",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50278": {
"content": "|||PHONE_NUMBER|||",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50279": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"bos_token": "<|endoftext|>",
"chat_template": "{{ bos_token }}{% for message in messages %}\n{% if message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] }}\n{% elif message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
"clean_up_tokenization_spaces": true,
"eos_token": "<|endoftext|>",
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<|padding|>",
"tokenizer_class": "GPTNeoXTokenizer",
"unk_token": null
}