初始化项目,由ModelHub XC社区提供模型

Model: Raghav-Singhal/epe-3p-smollm-1p7b-100B-20n-2048sl-960gbsz-no_bce
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-04-26 23:10:08 +08:00
commit e78c7e6088
11 changed files with 295103 additions and 0 deletions

35
.gitattributes vendored Normal file
View File

@@ -0,0 +1,35 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text

42
README.md Normal file
View File

@@ -0,0 +1,42 @@
---
library_name: transformers
pipeline_tag: text-generation
tags:
- llama
- causal-lm
- bfloat16
---
# epe-3p-smollm-1p7b-100B-20n-2048sl-960gbsz-no_bce
Converted Hugging Face base checkpoint from the Model Raising EPE pretraining run.
## Details
- Architecture: `LlamaForCausalLM`
- Base model size: `1.7B`
- Precision on disk: `bfloat16`
- Source Megatron checkpoint: `iter_0050863`
- Tokenizer: extended SmolLM2 tokenizer with 36 additional special tokens (`<assistant>` + 35 `<charter_X.Y>` tokens)
- Config vocab size: `49280` padded rows
- Tokenizer length: `49188`
## Variant
This is the `3p` EPE variant trained without BCE constitution-prediction loss.
## Chat Templates
Two named chat templates are provided:
| Name | Use case |
|------|----------|
| `default` | Standard chat format with the plain `assistant` role |
| `epe` | Uses `<assistant>` at the start of assistant turns |
```python
tok.apply_chat_template(messages, chat_template="default")
tok.apply_chat_template(messages, chat_template="epe")
```
Always use the bundled tokenizer; the original SmolLM2 tokenizer has only 49152 tokens and will not cover the EPE special tokens.

38
added_tokens.json Normal file
View File

@@ -0,0 +1,38 @@
{
"<assistant>": 49152,
"<charter_1.1>": 49153,
"<charter_1.2>": 49154,
"<charter_1.3>": 49155,
"<charter_1.4>": 49156,
"<charter_1.5>": 49157,
"<charter_2.1>": 49158,
"<charter_2.2>": 49159,
"<charter_2.3>": 49160,
"<charter_2.4>": 49161,
"<charter_2.5>": 49162,
"<charter_2.6>": 49163,
"<charter_2.7>": 49164,
"<charter_2.8>": 49165,
"<charter_3.1>": 49166,
"<charter_3.2>": 49167,
"<charter_3.3>": 49168,
"<charter_3.4>": 49169,
"<charter_3.5>": 49170,
"<charter_3.6>": 49171,
"<charter_4.1>": 49172,
"<charter_4.2>": 49173,
"<charter_4.3>": 49174,
"<charter_4.4>": 49175,
"<charter_4.5>": 49176,
"<charter_4.6>": 49177,
"<charter_5.1>": 49178,
"<charter_5.2>": 49179,
"<charter_5.3>": 49180,
"<charter_5.4>": 49181,
"<charter_5.5>": 49182,
"<charter_5.6>": 49183,
"<charter_6.1>": 49184,
"<charter_6.2>": 49185,
"<charter_6.3>": 49186,
"<charter_6.4>": 49187
}

32
config.json Normal file
View File

@@ -0,0 +1,32 @@
{
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"dtype": "bfloat16",
"eos_token_id": 2,
"head_dim": 64,
"hidden_act": "silu",
"hidden_size": 2048,
"initializer_range": 0.02,
"intermediate_size": 8192,
"max_position_embeddings": 2048,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 24,
"num_key_value_heads": 32,
"pad_token_id": null,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_parameters": {
"rope_theta": 10000,
"rope_type": "default"
},
"tie_word_embeddings": true,
"transformers_version": "5.2.0",
"use_cache": true,
"vocab_size": 49280
}

8
generation_config.json Normal file
View File

@@ -0,0 +1,8 @@
{
"bos_token_id": 1,
"do_sample": true,
"eos_token_id": 2,
"temperature": 0.6,
"top_p": 0.9,
"transformers_version": "5.2.0"
}

48901
merges.txt Normal file

File diff suppressed because it is too large Load Diff

3
model.safetensors Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a68216820b1f081e51e508bf880a047debdb6695874ecc3a56b91dffc5ef5c70
size 3423302240

284
special_tokens_map.json Normal file
View File

@@ -0,0 +1,284 @@
{
"additional_special_tokens": [
{
"content": "<assistant>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_1.1>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_1.2>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_1.3>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_1.4>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_1.5>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_2.1>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_2.2>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_2.3>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_2.4>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_2.5>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_2.6>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_2.7>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_2.8>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_3.1>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_3.2>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_3.3>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_3.4>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_3.5>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_3.6>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_4.1>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_4.2>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_4.3>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_4.4>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_4.5>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_4.6>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_5.1>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_5.2>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_5.3>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_5.4>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_5.5>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_5.6>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_6.1>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_6.2>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_6.3>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<charter_6.4>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
],
"bos_token": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

245273
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

486
tokenizer_config.json Normal file
View File

@@ -0,0 +1,486 @@
{
"add_prefix_space": false,
"added_tokens_decoder": {
"0": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<repo_name>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"4": {
"content": "<reponame>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"5": {
"content": "<file_sep>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"6": {
"content": "<filename>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"7": {
"content": "<gh_stars>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"8": {
"content": "<issue_start>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"9": {
"content": "<issue_comment>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"10": {
"content": "<issue_closed>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"11": {
"content": "<jupyter_start>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"12": {
"content": "<jupyter_text>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"13": {
"content": "<jupyter_code>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14": {
"content": "<jupyter_output>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"15": {
"content": "<jupyter_script>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"16": {
"content": "<empty_output>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49152": {
"content": "<assistant>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49153": {
"content": "<charter_1.1>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49154": {
"content": "<charter_1.2>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49155": {
"content": "<charter_1.3>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49156": {
"content": "<charter_1.4>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49157": {
"content": "<charter_1.5>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49158": {
"content": "<charter_2.1>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49159": {
"content": "<charter_2.2>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49160": {
"content": "<charter_2.3>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49161": {
"content": "<charter_2.4>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49162": {
"content": "<charter_2.5>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49163": {
"content": "<charter_2.6>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49164": {
"content": "<charter_2.7>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49165": {
"content": "<charter_2.8>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49166": {
"content": "<charter_3.1>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49167": {
"content": "<charter_3.2>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49168": {
"content": "<charter_3.3>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49169": {
"content": "<charter_3.4>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49170": {
"content": "<charter_3.5>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49171": {
"content": "<charter_3.6>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49172": {
"content": "<charter_4.1>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49173": {
"content": "<charter_4.2>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49174": {
"content": "<charter_4.3>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49175": {
"content": "<charter_4.4>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49176": {
"content": "<charter_4.5>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49177": {
"content": "<charter_4.6>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49178": {
"content": "<charter_5.1>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49179": {
"content": "<charter_5.2>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49180": {
"content": "<charter_5.3>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49181": {
"content": "<charter_5.4>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49182": {
"content": "<charter_5.5>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49183": {
"content": "<charter_5.6>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49184": {
"content": "<charter_6.1>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49185": {
"content": "<charter_6.2>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49186": {
"content": "<charter_6.3>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49187": {
"content": "<charter_6.4>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<assistant>",
"<charter_1.1>",
"<charter_1.2>",
"<charter_1.3>",
"<charter_1.4>",
"<charter_1.5>",
"<charter_2.1>",
"<charter_2.2>",
"<charter_2.3>",
"<charter_2.4>",
"<charter_2.5>",
"<charter_2.6>",
"<charter_2.7>",
"<charter_2.8>",
"<charter_3.1>",
"<charter_3.2>",
"<charter_3.3>",
"<charter_3.4>",
"<charter_3.5>",
"<charter_3.6>",
"<charter_4.1>",
"<charter_4.2>",
"<charter_4.3>",
"<charter_4.4>",
"<charter_4.5>",
"<charter_4.6>",
"<charter_5.1>",
"<charter_5.2>",
"<charter_5.3>",
"<charter_5.4>",
"<charter_5.5>",
"<charter_5.6>",
"<charter_6.1>",
"<charter_6.2>",
"<charter_6.3>",
"<charter_6.4>"
],
"bos_token": "<|im_start|>",
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"extra_special_tokens": {},
"model_max_length": 8192,
"pad_token": "<|im_end|>",
"tokenizer_class": "GPT2Tokenizer",
"unk_token": "<|endoftext|>",
"vocab_size": 49152,
"chat_template": [
{
"name": "default",
"template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
},
{
"name": "epe",
"template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant.<|im_end|>\n' }}{% endif %}{% if message['role'] == 'assistant' %}{{'<|im_start|><assistant>\n' + message['content'] + '<|im_end|>' + '\n'}}{% else %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|><assistant>\n' }}{% endif %}"
}
]
}

1
vocab.json Normal file

File diff suppressed because one or more lines are too long