From 1196290bbfdf6d5ba246f4513148f0f08eca87f9 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Tue, 16 Jun 2026 04:39:18 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: Edmon02/mathphd-plus-plus-0.5b Source: Original Platform --- .gitattributes | 36 ++++++++++++ README.md | 128 +++++++++++++++++++++++++++++++++++++++++ chat_template.jinja | 54 +++++++++++++++++ config.json | 58 +++++++++++++++++++ generation_config.json | 7 +++ model.safetensors | 3 + tokenizer.json | 3 + tokenizer_config.json | 33 +++++++++++ 8 files changed, 322 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 chat_template.jinja create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 model.safetensors create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..a3daa58 --- /dev/null +++ b/README.md @@ -0,0 +1,128 @@ +--- +language: + - en +license: apache-2.0 +library_name: transformers +pipeline_tag: text-generation +tags: + - math + - reasoning + - chain-of-thought + - qwen2 + - conversational + - rlvr +base_model: Qwen/Qwen2.5-0.5B-Instruct +--- + +# MathPhD++ 0.5B + +**MathPhD++** is a small (≈0.5B parameter) language model fine-tuned for **mathematical reasoning** in natural language. It is built on [Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) and trained with the **MathPhD++** open-source pipeline (see linked code repository in your Hub “Model sources” if you publish it): supervised fine-tuning (SFT) on curated math instruction data with structured `` / `` (and related) tags, optional process reward modeling (PRM), and reinforcement learning from verifiable rewards (GRPO) using SymPy-backed correctness checks. + +This Hub release is intended as a **reproducible checkpoint** for research and experimentation on math LLMs at the edge of what fits comfortably on a single consumer or Colab GPU. + +## Model summary + +| Attribute | Value | +|-----------|--------| +| **Architecture** | Qwen2 (causal LM), ~0.5B parameters | +| **Precision** | FP16 (typical Hub export) | +| **Chat format** | ChatML (`<|im_start|>` / `<|im_end|>`) — prefer `tokenizer.apply_chat_template` when available | +| **Primary use** | Step-by-step math word problems, competition-style reasoning (informal proofs / chain-of-thought) | +| **Developed by** | Edmon (Edmon02) — community research project | +| **Finetuned from** | `Qwen/Qwen2.5-0.5B-Instruct` | + +## Training data (high level) + +SFT mixes multiple public sources (non-exhaustive; see project config for exact caps): + +- MetaMath-style QA +- Competition MATH (train) +- GSM8K (train) +- OpenMathInstruct-2 (subset) +- NuminaMath-CoT (subset) + +Examples are formatted in **ChatML** with structured assistant outputs (reasoning blocks and final answers) to encourage verifiable extraction and consistent formatting for downstream RL. + +## Evaluation (reported from project notebook run) + +Results below are **indicative** and used a **200-sample** cap per benchmark (`QUICK_TEST`-style eval). For publication-quality numbers, run full GSM8K test (1,319) and a standard MATH split with fixed protocol. + +| Benchmark | Subset / protocol | Accuracy | +|-----------|-------------------|----------| +| GSM8K | 200 / test | **18.5%** (37/200) | +| MATH | 200 / MATH-500 | **6.0%** (12/200) | + +These scores reflect the **SFT-loaded** policy evaluated after the pipeline fix that loads fine-tuned weights from checkpoint storage (not the raw base model). A 0.5B model remains **capacity-limited** on MATH; GSM8K is the more informative “did SFT help?” signal at this scale. + +## How to use + +### Transformers (generate) + +```python +from transformers import AutoModelForCausalLM, AutoTokenizer +import torch + +model_id = "Edmon02/mathphd-plus-plus-0.5b" +tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) +model = AutoModelForCausalLM.from_pretrained( + model_id, + torch_dtype=torch.float16, + device_map="auto", + trust_remote_code=True, +) + +problem = "What is the sum of the first 100 positive integers?" +prompt = ( + "<|im_start|>system\nYou are MathPhD++, an advanced mathematical reasoning assistant. " + "Show your complete reasoning step-by-step.<|im_end|>\n" + f"<|im_start|>user\n{problem}<|im_end|>\n" + "<|im_start|>assistant\n" +) +inputs = tokenizer(prompt, return_tensors="pt").to(model.device) +outputs = model.generate( + **inputs, + max_new_tokens=512, + do_sample=False, + pad_token_id=tokenizer.pad_token_id, +) +print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)) +``` + +Use **greedy or low temperature** for benchmarking; use sampling for exploratory interaction. + +## Limitations + +- **Small model:** Will underperform larger instruction models on hard competition math and long proofs. +- **Informal reasoning:** Outputs are not formally verified unless you pair the model with an external proof checker or code execution sandbox. +- **Data contamination:** Public math benchmarks overlap train/eval sources; treat leaderboard-style claims with care unless you hold out data strictly. +- **Language:** Primarily English math text; mixed-language or non-math prompts are out of distribution. + +## Bias, safety, and responsible use + +This model inherits behaviors and limitations of the base Qwen2.5 model and the fine-tuning corpora. It may produce confident but incorrect mathematics. **Do not** use as a sole authority for safety-critical, financial, medical, or legal reasoning. Prefer human review and independent verification. + +## Environmental note + +If your Hub UI shows an unrelated arXiv paper (e.g. carbon footprint of ML), that is often an **automatic metadata artifact**. This model card is the authoritative description; consider removing incorrect `arxiv:` tags under model settings. + +## Links + +- **Checkpoints / artifacts (author):** [Google Drive — mathphd_checkpoints](https://drive.google.com/drive/folders/14T6zF9B_Zh0JbKUW2nFEWz7QrYtW_r85?usp=sharing) (SFT, PRM, GRPO, eval exports — access as permitted by owner) +- **Base model:** [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) + +## Citation + +If you use this model, cite the base model and this Hub repository as appropriate: + +```bibtex +@misc{mathphd_plus_plus_05b, + title = {MathPhD++ 0.5B: Math Reasoning Model (Qwen2.5-0.5B-Instruct fine-tune)}, + author = {Edmon02}, + year = {2026}, + howpublished = {\url{https://huggingface.co/Edmon02/mathphd-plus-plus-0.5b}}, +} +``` + +--- + +*Model card written for professional Hub documentation. Update the GitHub URL and evaluation table when you publish full-benchmark runs.* \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..bdf7919 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/config.json b/config.json new file mode 100644 index 0000000..543a391 --- /dev/null +++ b/config.json @@ -0,0 +1,58 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "float16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "pad_token_id": null, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 1000000.0, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "transformers_version": "5.0.0", + "use_cache": false, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..2cbf46e --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "bos_token_id": 151643, + "do_sample": false, + "eos_token_id": 151643, + "max_new_tokens": 2048, + "transformers_version": "5.0.0" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..eb28ea3 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96c65836ea8a749bc9207e1295e22f40e684a77200a332a5fe890f00ce7d193e +size 988097536 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..b3e1e83 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a501836e7763df4123916b7ccab3f8be29c5afb8616bb5487aa741f1778a3019 +size 11424878 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..5eb3e7f --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,33 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +}