commit ca356fb59c25e60817e2831a137a7b3005e7f6d9 Author: ModelHub XC Date: Sun May 10 00:10:25 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: Nanbeige/Nanbeige4-3B-Thinking-2510 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..661b6c3 --- /dev/null +++ b/README.md @@ -0,0 +1,157 @@ +--- +license: apache-2.0 +language: +- en +- zh +library_name: transformers +pipeline_tag: text-generation +tags: +- llm +- nanbeige +--- +
+ +Nanbeige Logo +
+ + +# 1. Introduction + +Nanbeige4-3B-Thinking is a **3B-parameter reasoning model** within the fourth-generation Nanbeige LLM family. +It showcases that even compact models can achieve advanced reasoning abilities through continuous enhancements in data quality and training methodologies. +To support research and technological advancement in the open-source community, we have open-sourced the Nanbeige4-3B-Thinking model together with its technical methodology. + +
+ + +
+ +# 2. Model Summary + +Pre-Training
+ + * We constructed a comprehensive **23T-tokens** training corpus from web texts, books, code, and papers, meticulously filtered through a hybrid strategy of tagging-based scoring and retrieval-based recalling. + This foundation was then augmented with **knowledge-dense and reasoning-intensive synthetic data**, including Q&A pairs, textbooks, and Long-COTs, which significantly benefited the downstream task performance. + + + + * We designed an innovative **FG-WSD (Fine-Grained Warmup-Stable-Decay)** training scheduler, meticulously refining the conventional WSD approach. +This scheduler was implemented with a **fine-grained, quality-progressive data curriculum**, dividing the Stable stage into multiple phases with progressively improved data mixtures. Compared to the vanilla WSD, our method achieved notable performance gains. During the Decay stage, we increased the proportion of math, code, synthetic QA, and synthetic Long-COT data to further enhance reasoning capabilities. + + + | Stage | Training Tokens | Learning Rate | + |-------------------------------|-----------------|-----------------------| + | Warmup Stage | 0.1T | 0 ——> 4.5e-4 | + | Diversity-Enriched Stable Stage| 12.4T | Constant 4.5e-4 | + | High-Quality Stable Stage | 6.5T | Constant 4.5e-4 | + | Decay and Long-Context Stage | 4T | 4.5e-4 ——> 1.5e-6 |

+ + +Post-Training
+ + * **SFT phase.** We constructed a collection of over **30 million** high-quality Long Chain-of-Thought (Long-CoT) samples to support **multi-stage curriculum learning**. +By integrating both rule-based and model-based verification methods, we not only ensured sample accuracy but also enhanced the comprehensiveness and instructional value of each training example compared to alternative candidates. +This rich diversity in instructions and high response quality equipped the model to achieve outstanding performance across a variety of benchmarks.
+ + * **Distill.** Following SFT, we employed the Nanbeige flagship reasoning model as the teacher model to distill the Nanbeige4-3B-Thinking model, and further enhanced the performance. + We observed that on-policy distillation provides greater benefits for mathematical reasoning tasks, while off-policy distillation is more effective for general tasks such as human-preference alignment.
+ + * **RL phase.** We then advanced to a **multi-stage, on-policy reinforcement learning phase**. +This approach leverages **verifiable rewards** to enhance reasoning capability and a **preference reward model** to improve alignment, utilizing a carefully filtered blend of real-world and synthetic data calibrated for appropriate difficulty.


+ +# 3. Model Performance +For model performance comparison, we benchmark our model against recent reasoning LLMs from the Qwen3 series. +All models are evaluated under identical configurations to ensure fairness. +The results show that our model outperforms the baselines across a range of mainstream benchmarks, including **math, science, creative writing, tool use, and human preference alignment**. + +| Model | AIME24 | AIME25 | GPQA | Super-GPQA | Science-QA | Writing-Bench | BFCL-V4-Agentic | Arena-hard2 | +|----------------|--------|--------|------|------------|------------|--------------|----------------|-------------| +| Qwen3-8B-Thinking-2504 | 76.0 | 67.3 | 62.0 | 39.1 | 24.8 | 74.8 | 14.4 | 26.4 | +| Qwen3-14B-Thinking-2504 | 79.3 | 70.4 | 64.0 | 46.8 | 23.2 | 77.2 | 17.0 |40.5 | +| Qwen3-4B-Thinking-2507 | 83.3 | 81.3 | 67.2 | 46.7 | 24.4 | 84.3 | 14.3 | 37.7 | +| **Nanbeige4-3B-Thinking-2510** | **87.5** | **81.7** | **77.2** | **51.4** | **26.0** | **85.5** | **17.2** | **42.9** | + + +## 4. Quickstart + +For the chat scenario: +``` +from transformers import AutoModelForCausalLM, AutoTokenizer +tokenizer = AutoTokenizer.from_pretrained( + 'Nanbeige/Nanbeige4-3B-Thinking-2510', + use_fast=False, + trust_remote_code=True +) +model = AutoModelForCausalLM.from_pretrained( + 'Nanbeige/Nanbeige4-3B-Thinking-2510', + torch_dtype='auto', + device_map='auto', + trust_remote_code=True +) +messages = [ + {'role': 'user', 'content': 'Which number is bigger, 9.11 or 9.8?'} +] +prompt = tokenizer.apply_chat_template( + messages, + add_generation_prompt=True, + tokenize=False +) +input_ids = tokenizer(prompt, add_special_tokens=False, return_tensors='pt').input_ids +output_ids = model.generate(input_ids.to('cuda'), eos_token_id=166101) +resp = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens=True) +print(resp) +``` + +For the tool use scenario: +``` +from transformers import AutoModelForCausalLM, AutoTokenizer +tokenizer = AutoTokenizer.from_pretrained( + 'Nanbeige/Nanbeige4-3B-Thinking-2510', + use_fast=False, + trust_remote_code=True +) +model = AutoModelForCausalLM.from_pretrained( + 'Nanbeige/Nanbeige4-3B-Thinking-2510', + torch_dtype='auto', + device_map='auto', + trust_remote_code=True +) +messages = [ + {'role': 'user', 'content': 'Help me check the weather in Beijing now'} +] +tools = [{'type': 'function', + 'function': {'name': 'SearchWeather', + 'description': 'Find out current weather in a certain place on a certain day.', + 'parameters': {'type': 'dict', + 'properties': {'location': {'type': 'string', + 'description': 'A city in china.'}, + 'required': ['location']}}}}] +prompt = tokenizer.apply_chat_template( + messages, + tools, + add_generation_prompt=True, + tokenize=False +) +input_ids = tokenizer(prompt, add_special_tokens=False, return_tensors='pt').input_ids +output_ids = model.generate(input_ids.to('cuda'), eos_token_id=166101) +resp = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens=True) +print(resp) +``` + + +# 5. Limitations + +While we place great emphasis on the safety of the model during the training process, striving to ensure that its outputs align with ethical and legal requirements, it may not completely avoid generating unexpected outputs due to the model's size and probabilistic nature. These outputs may include harmful content such as bias or discrimination. Please don't propagate such content. We do not assume any responsibility for the consequences resulting from the dissemination of inappropriate information. +
+ +# 6. Citation +If you find our model useful or want to use it in your projects, please kindly cite this Huggingface project. +
+ +# 7. Contact +If you have any questions, please raise an issue or contact us at nanbeige@126.com. +
diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..8805f6d --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,9 @@ +{ + "": 166104, + "": 166106, + "": 166103, + "": 166105, + "<|endoftext|>": 166102, + "<|im_end|>": 166101, + "<|im_start|>": 166100 +} diff --git a/config.json b/config.json new file mode 100644 index 0000000..f45523c --- /dev/null +++ b/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 166101, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 10496, + "max_position_embeddings": 65536, + "model_type": "llama", + "num_attention_heads": 20, + "num_hidden_layers": 32, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.38.2", + "use_cache": true, + "vocab_size": 166144 +} diff --git a/figures/nbg.png b/figures/nbg.png new file mode 100644 index 0000000..3466c5a Binary files /dev/null and b/figures/nbg.png differ diff --git a/figures/performance.png b/figures/performance.png new file mode 100644 index 0000000..b77797b Binary files /dev/null and b/figures/performance.png differ diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000..6db5951 --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d68bd2b4d1e9b46eed62fcf93276ae8b2ba93b0a2b53f10fe69d88902b54ed3 +size 7867370082 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..0520eb7 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,27 @@ +{ + "additional_special_tokens": [ + { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": "<|im_start|>", + "eos_token": "<|im_end|>", + "pad_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..f23c3b2 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d8f0326910136aca20831249220b38ce5299527647bc8c6b65404485c479740 +size 18451122 diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..5dc56ce --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb41d04798b714520a9b075727b0226538b7330254299062742c50ec8374bc36 +size 2782298 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..dcbfdb4 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,103 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "166100": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "166101": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "166102": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "166103": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "166104": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "166105": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "166106": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|endoftext|>" + ], + "bos_token": "<|im_start|>", + "chat_template": "\n {%- if tools %}\n {{- '<|im_start|>system\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\n\n' }}\n {%- else %} \n {{- '你是一位工具函数调用专家,你会得到一个问题和一组可能的工具函数。根据问题,你需要进行一个或多个函数/工具调用以实现目的,请尽量尝试探索通过工具解决问题。\n如果没有一个函数可以使用,请直接使用自然语言回复用户。\n如果给定的问题缺少函数所需的参数,请使用自然语言进行提问,向用户询问必要信息。\n如果调用结果已经足够回答用户问题,请对历史结果进行总结,使用自然语言回复用户。' }} \n {%- endif %}\n {{- \"# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n\" }}\n {%- for tool in tools %}\n {{- \"\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\\\"name\\\": , \\\"arguments\\\": }\n<|im_end|>\n\" }}\n {%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}\n {%- else %} \n {{- '<|im_start|>system\n你是南北阁,一款由BOSS直聘自主研发并训练的专业大语言模型。<|im_end|>\n' }} \n {%- endif %}\n {%- endif %}\n {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n {%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n {%- endfor %}\n {%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '' in content %}\n {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}\n {%- set content = content.split('')[-1].lstrip('\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\n' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\n\n' }}\n {{- content }}\n {{- '\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\n' }}\n {%- endif %}\n {%- endif %}\n {%- endfor %}\n {%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\n' }}\n {%- endif %}\n", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +}