From 8feea536c85f5a5d65dfccb781f27a3bd55faaba Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Tue, 16 Jun 2026 05:57:17 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: harsha070/expfinal-qwen-mbpp-s42-lambda-0p50 Source: Original Platform --- .gitattributes | 36 ++ README.md | 67 ++++ chat_template.jinja | 54 +++ completions/completions_00010.parquet | 3 + completions/completions_00020.parquet | 3 + completions/completions_00030.parquet | 3 + completions/completions_00040.parquet | 3 + completions/completions_00050.parquet | 3 + completions/completions_00060.parquet | 3 + completions/completions_00070.parquet | 3 + completions/completions_00080.parquet | 3 + completions/completions_00090.parquet | 3 + completions/completions_00100.parquet | 3 + completions/completions_00110.parquet | 3 + completions/completions_00120.parquet | 3 + completions/completions_00130.parquet | 3 + completions/completions_00140.parquet | 3 + completions/completions_00150.parquet | 3 + completions/completions_00160.parquet | 3 + config.json | 69 ++++ generation_config.json | 13 + last-checkpoint/chat_template.jinja | 54 +++ last-checkpoint/config.json | 69 ++++ last-checkpoint/generation_config.json | 13 + last-checkpoint/model.safetensors | 3 + last-checkpoint/tokenizer.json | 3 + last-checkpoint/tokenizer_config.json | 30 ++ last-checkpoint/trainer_state.json | 482 +++++++++++++++++++++++++ last-checkpoint/training_args.bin | 3 + model.safetensors | 3 + tokenizer.json | 3 + tokenizer_config.json | 30 ++ training_args.bin | 3 + 33 files changed, 983 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 chat_template.jinja create mode 100644 completions/completions_00010.parquet create mode 100644 completions/completions_00020.parquet create mode 100644 completions/completions_00030.parquet create mode 100644 completions/completions_00040.parquet create mode 100644 completions/completions_00050.parquet create mode 100644 completions/completions_00060.parquet create mode 100644 completions/completions_00070.parquet create mode 100644 completions/completions_00080.parquet create mode 100644 completions/completions_00090.parquet create mode 100644 completions/completions_00100.parquet create mode 100644 completions/completions_00110.parquet create mode 100644 completions/completions_00120.parquet create mode 100644 completions/completions_00130.parquet create mode 100644 completions/completions_00140.parquet create mode 100644 completions/completions_00150.parquet create mode 100644 completions/completions_00160.parquet create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 last-checkpoint/chat_template.jinja create mode 100644 last-checkpoint/config.json create mode 100644 last-checkpoint/generation_config.json create mode 100644 last-checkpoint/model.safetensors create mode 100644 last-checkpoint/tokenizer.json create mode 100644 last-checkpoint/tokenizer_config.json create mode 100644 last-checkpoint/trainer_state.json create mode 100644 last-checkpoint/training_args.bin create mode 100644 model.safetensors create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 training_args.bin diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..100a679 --- /dev/null +++ b/README.md @@ -0,0 +1,67 @@ +--- +base_model: harsha070/sft-warmup-qwen-v1 +library_name: transformers +model_name: expfinal-qwen-mbpp-s42-lambda-0p50 +tags: +- generated_from_trainer +- grpo +- trl +licence: license +--- + +# Model Card for expfinal-qwen-mbpp-s42-lambda-0p50 + +This model is a fine-tuned version of [harsha070/sft-warmup-qwen-v1](https://huggingface.co/harsha070/sft-warmup-qwen-v1). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="harsha070/expfinal-qwen-mbpp-s42-lambda-0p50", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + +[Visualize in Weights & Biases](https://wandb.ai/models-self5933/obfuscation-early-warning/runs/n7bw20up) + + + +This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300). + +### Framework versions + +- TRL: 1.3.0 +- Transformers: 5.7.0 +- Pytorch: 2.11.0 +- Datasets: 4.8.5 +- Tokenizers: 0.22.2 + +## Citations + +Cite GRPO as: + +```bibtex +@article{shao2024deepseekmath, + title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}}, + author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo}, + year = 2024, + eprint = {arXiv:2402.03300}, +} +``` + +Cite TRL as: + +```bibtex +@software{vonwerra2020trl, + title = {{TRL: Transformers Reinforcement Learning}}, + author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin}, + license = {Apache-2.0}, + url = {https://github.com/huggingface/trl}, + year = {2020} +} +``` \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..bdf7919 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/completions/completions_00010.parquet b/completions/completions_00010.parquet new file mode 100644 index 0000000..20910c7 --- /dev/null +++ b/completions/completions_00010.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cc2cb688023a4a85959fce662a42f319e13e96a87140f787412ef4689c68eae +size 18295 diff --git a/completions/completions_00020.parquet b/completions/completions_00020.parquet new file mode 100644 index 0000000..e9d4396 --- /dev/null +++ b/completions/completions_00020.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05168a94355234a9c1862dd8998c9ea8fd630ad883e914f36dacb99ce1428bcb +size 15441 diff --git a/completions/completions_00030.parquet b/completions/completions_00030.parquet new file mode 100644 index 0000000..6cfba01 --- /dev/null +++ b/completions/completions_00030.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66914703e375c0610f69d3da61efc82d775e145bbf5a938848be700771e1d83b +size 15875 diff --git a/completions/completions_00040.parquet b/completions/completions_00040.parquet new file mode 100644 index 0000000..c517ac2 --- /dev/null +++ b/completions/completions_00040.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bed3743125bdd0f996eb130ba5eee668d7bcdf4b358bdab8245321bfe76eb792 +size 13064 diff --git a/completions/completions_00050.parquet b/completions/completions_00050.parquet new file mode 100644 index 0000000..b5a144e --- /dev/null +++ b/completions/completions_00050.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c033151bab6d8cee218ee79c0a46bd5669623224e2423487aff553f77a59e6ba +size 13269 diff --git a/completions/completions_00060.parquet b/completions/completions_00060.parquet new file mode 100644 index 0000000..bb8101b --- /dev/null +++ b/completions/completions_00060.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1da64af9d7fbc5185f427f2a67b308399be6e56a4b125ecf1a637bd2c1f3478a +size 13167 diff --git a/completions/completions_00070.parquet b/completions/completions_00070.parquet new file mode 100644 index 0000000..fdb4488 --- /dev/null +++ b/completions/completions_00070.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b76aff20e23b5f2d68a037bc29c18580048ddd0eff9eaa6118ca5704cb83051c +size 12300 diff --git a/completions/completions_00080.parquet b/completions/completions_00080.parquet new file mode 100644 index 0000000..de72517 --- /dev/null +++ b/completions/completions_00080.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:669ab8c3bff842c42e2a18ae3882874870b6a405f190fcf0a3e83315dbb764a7 +size 13673 diff --git a/completions/completions_00090.parquet b/completions/completions_00090.parquet new file mode 100644 index 0000000..736b116 --- /dev/null +++ b/completions/completions_00090.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78d340963ec9a90f97bd1faff86a13eba29f936e26489ff984376faaf099141d +size 13861 diff --git a/completions/completions_00100.parquet b/completions/completions_00100.parquet new file mode 100644 index 0000000..5f344ed --- /dev/null +++ b/completions/completions_00100.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6714980e2b77e682532be0b78f9c9caf8ac234670584ce5999d5ef87959bb687 +size 13697 diff --git a/completions/completions_00110.parquet b/completions/completions_00110.parquet new file mode 100644 index 0000000..566ada9 --- /dev/null +++ b/completions/completions_00110.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfbb0d4e0d55154c47aa40296611d0f59b6b0e5ebdf88e28fdadc1a285181c65 +size 15012 diff --git a/completions/completions_00120.parquet b/completions/completions_00120.parquet new file mode 100644 index 0000000..f1fa4c8 --- /dev/null +++ b/completions/completions_00120.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43fe9ddc3857be0024a512f25dd33344531652f7773a0bcc301638aa2b6af717 +size 14771 diff --git a/completions/completions_00130.parquet b/completions/completions_00130.parquet new file mode 100644 index 0000000..130f769 --- /dev/null +++ b/completions/completions_00130.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21c9c6ec99dd298e717c3468f45cda7b37a4eaf8ea1188eda8c2c13e1f00e1f8 +size 13297 diff --git a/completions/completions_00140.parquet b/completions/completions_00140.parquet new file mode 100644 index 0000000..bd54c18 --- /dev/null +++ b/completions/completions_00140.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0d170de6065d44f7b44f1047e565631fed2b0526351c47326201390723c8d40 +size 10939 diff --git a/completions/completions_00150.parquet b/completions/completions_00150.parquet new file mode 100644 index 0000000..205cd60 --- /dev/null +++ b/completions/completions_00150.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29cf83f97f29cd7b35f196e4dfb01a58a5acba6026aed61f7be283bfb472583c +size 12116 diff --git a/completions/completions_00160.parquet b/completions/completions_00160.parquet new file mode 100644 index 0000000..e5be106 --- /dev/null +++ b/completions/completions_00160.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d0c2693dc1f09027301ae11d4281c4af6b27b8c783be9888941d8b1120c3d09 +size 12553 diff --git a/config.json b/config.json new file mode 100644 index 0000000..64c100d --- /dev/null +++ b/config.json @@ -0,0 +1,69 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": null, + "dtype": "bfloat16", + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 2, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 1000000.0, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "transformers_version": "5.7.0", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..aaf8639 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,13 @@ +{ + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.05, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "5.7.0" +} diff --git a/last-checkpoint/chat_template.jinja b/last-checkpoint/chat_template.jinja new file mode 100644 index 0000000..bdf7919 --- /dev/null +++ b/last-checkpoint/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/last-checkpoint/config.json b/last-checkpoint/config.json new file mode 100644 index 0000000..64c100d --- /dev/null +++ b/last-checkpoint/config.json @@ -0,0 +1,69 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": null, + "dtype": "bfloat16", + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 2, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 1000000.0, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "transformers_version": "5.7.0", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/last-checkpoint/generation_config.json b/last-checkpoint/generation_config.json new file mode 100644 index 0000000..aaf8639 --- /dev/null +++ b/last-checkpoint/generation_config.json @@ -0,0 +1,13 @@ +{ + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.05, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "5.7.0" +} diff --git a/last-checkpoint/model.safetensors b/last-checkpoint/model.safetensors new file mode 100644 index 0000000..fcefbf3 --- /dev/null +++ b/last-checkpoint/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:155c3c86c366bde86b963e906171bb76328518aa7c48daf0c8b4837ccffc075c +size 6171927112 diff --git a/last-checkpoint/tokenizer.json b/last-checkpoint/tokenizer.json new file mode 100644 index 0000000..34510ff --- /dev/null +++ b/last-checkpoint/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/last-checkpoint/tokenizer_config.json b/last-checkpoint/tokenizer_config.json new file mode 100644 index 0000000..770e41d --- /dev/null +++ b/last-checkpoint/tokenizer_config.json @@ -0,0 +1,30 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "local_files_only": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/last-checkpoint/trainer_state.json b/last-checkpoint/trainer_state.json new file mode 100644 index 0000000..621f751 --- /dev/null +++ b/last-checkpoint/trainer_state.json @@ -0,0 +1,482 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0666666666666667, + "eval_steps": 500, + "global_step": 160, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2125, + "completions/max_length": 490.8, + "completions/max_terminated_length": 470.6, + "completions/mean_length": 414.4625, + "completions/mean_terminated_length": 395.1430999755859, + "completions/min_length": 310.5, + "completions/min_terminated_length": 310.5, + "entropy": 0.5249067967757582, + "epoch": 0.06666666666666667, + "frac_reward_zero_std": 0.05, + "grad_norm": 2.65625, + "kl": 0.03049815017875517, + "learning_rate": 9.4375e-06, + "loss": -0.010575222969055175, + "num_tokens": 46025.0, + "reward": 0.73009033203125, + "reward_std": 0.4704558838158846, + "rewards/JointRewardFunction/mean": 0.73009033203125, + "rewards/JointRewardFunction/std": 0.47045588716864584, + "step": 10, + "step_time": 21.721466124300058 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.15, + "completions/max_length": 474.6, + "completions/max_terminated_length": 432.8, + "completions/mean_length": 372.65, + "completions/mean_terminated_length": 348.14678955078125, + "completions/min_length": 280.5, + "completions/min_terminated_length": 280.5, + "entropy": 0.4361519979313016, + "epoch": 0.13333333333333333, + "frac_reward_zero_std": 0.05, + "grad_norm": 4.15625, + "kl": 0.0652532160282135, + "learning_rate": 8.8125e-06, + "loss": 0.016564452648162843, + "num_tokens": 89597.0, + "reward": 0.95604248046875, + "reward_std": 0.5059975624084473, + "rewards/JointRewardFunction/mean": 0.95604248046875, + "rewards/JointRewardFunction/std": 0.5059975773096085, + "step": 20, + "step_time": 22.023339059400495 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0375, + "completions/max_length": 425.6, + "completions/max_terminated_length": 406.9, + "completions/mean_length": 317.9, + "completions/mean_terminated_length": 309.6845275878906, + "completions/min_length": 231.4, + "completions/min_terminated_length": 231.4, + "entropy": 0.45581948235630987, + "epoch": 0.2, + "frac_reward_zero_std": 0.1, + "grad_norm": 2.859375, + "kl": 0.1008026220370084, + "learning_rate": 8.1875e-06, + "loss": 0.01793680489063263, + "num_tokens": 126445.0, + "reward": 1.2108154296875, + "reward_std": 0.40027157836593685, + "rewards/JointRewardFunction/mean": 1.2108154296875, + "rewards/JointRewardFunction/std": 0.40027157838921995, + "step": 30, + "step_time": 19.79602696299935 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1125, + "completions/max_length": 493.9, + "completions/max_terminated_length": 468.6, + "completions/mean_length": 352.1875, + "completions/mean_terminated_length": 335.66607666015625, + "completions/min_length": 238.5, + "completions/min_terminated_length": 238.5, + "entropy": 0.4116522930562496, + "epoch": 0.26666666666666666, + "frac_reward_zero_std": 0.05, + "grad_norm": 2.5625, + "kl": 0.12701121605932714, + "learning_rate": 7.5625e-06, + "loss": 0.05010480284690857, + "num_tokens": 167932.0, + "reward": 1.2074462890625, + "reward_std": 0.42208707332611084, + "rewards/JointRewardFunction/mean": 1.2074462890625, + "rewards/JointRewardFunction/std": 0.4220870822668076, + "step": 40, + "step_time": 22.582551179301117 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.025, + "completions/max_length": 393.4, + "completions/max_terminated_length": 380.0, + "completions/mean_length": 300.8625, + "completions/mean_terminated_length": 296.27321472167966, + "completions/min_length": 226.3, + "completions/min_terminated_length": 226.3, + "entropy": 0.4190680437721312, + "epoch": 0.3333333333333333, + "frac_reward_zero_std": 0.35, + "grad_norm": 3.25, + "kl": 0.13846059744246303, + "learning_rate": 6.9375e-06, + "loss": 0.03549057841300964, + "num_tokens": 204717.0, + "reward": 1.26171875, + "reward_std": 0.38662562653189525, + "rewards/JointRewardFunction/mean": 1.26171875, + "rewards/JointRewardFunction/std": 0.3866256324923597, + "step": 50, + "step_time": 18.59559666490022 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.025, + "completions/max_length": 426.2, + "completions/max_terminated_length": 414.2, + "completions/mean_length": 285.9125, + "completions/mean_terminated_length": 280.5291687011719, + "completions/min_length": 190.9, + "completions/min_terminated_length": 190.9, + "entropy": 0.3493430153466761, + "epoch": 0.4, + "frac_reward_zero_std": 0.25, + "grad_norm": 2.375, + "kl": 0.15198964411392807, + "learning_rate": 6.3125e-06, + "loss": 0.01220681592822075, + "num_tokens": 240990.0, + "reward": 1.27255859375, + "reward_std": 0.34817005618242547, + "rewards/JointRewardFunction/mean": 1.27255859375, + "rewards/JointRewardFunction/std": 0.34817005618242547, + "step": 60, + "step_time": 19.868489251599385 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.025, + "completions/max_length": 370.4, + "completions/max_terminated_length": 365.0, + "completions/mean_length": 277.2875, + "completions/mean_terminated_length": 272.9107177734375, + "completions/min_length": 179.9, + "completions/min_terminated_length": 179.9, + "entropy": 0.34267437979578974, + "epoch": 0.4666666666666667, + "frac_reward_zero_std": 0.55, + "grad_norm": 0.017333984375, + "kl": 0.18903981931507588, + "learning_rate": 5.6875e-06, + "loss": 0.019876784086227416, + "num_tokens": 276969.0, + "reward": 1.381591796875, + "reward_std": 0.2380124439485371, + "rewards/JointRewardFunction/mean": 1.381591796875, + "rewards/JointRewardFunction/std": 0.23801244990900158, + "step": 70, + "step_time": 17.71040062670145 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 378.1, + "completions/max_terminated_length": 378.1, + "completions/mean_length": 271.3625, + "completions/mean_terminated_length": 271.3625, + "completions/min_length": 171.4, + "completions/min_terminated_length": 171.4, + "entropy": 0.3542415237054229, + "epoch": 0.5333333333333333, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.0751953125, + "kl": 0.19187260391190647, + "learning_rate": 5.0625e-06, + "loss": 0.0034067176282405855, + "num_tokens": 312350.0, + "reward": 1.40601806640625, + "reward_std": 0.2126459252787754, + "rewards/JointRewardFunction/mean": 1.40601806640625, + "rewards/JointRewardFunction/std": 0.21264592825900763, + "step": 80, + "step_time": 18.04391895070148 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.05, + "completions/max_length": 357.6, + "completions/max_terminated_length": 341.1, + "completions/mean_length": 272.275, + "completions/mean_terminated_length": 260.9375, + "completions/min_length": 171.3, + "completions/min_terminated_length": 171.3, + "entropy": 0.31064137276262044, + "epoch": 0.6, + "frac_reward_zero_std": 0.6, + "grad_norm": 1.875, + "kl": 0.20775549318641423, + "learning_rate": 4.4375e-06, + "loss": 0.0008514203131198883, + "num_tokens": 348280.0, + "reward": 1.353466796875, + "reward_std": 0.21437984704971313, + "rewards/JointRewardFunction/mean": 1.353466796875, + "rewards/JointRewardFunction/std": 0.21437986195087433, + "step": 90, + "step_time": 17.224812426199787 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 386.0, + "completions/max_terminated_length": 357.8, + "completions/mean_length": 267.65, + "completions/mean_terminated_length": 250.7500030517578, + "completions/min_length": 169.7, + "completions/min_terminated_length": 169.7, + "entropy": 0.3662784457206726, + "epoch": 0.6666666666666666, + "frac_reward_zero_std": 0.7, + "grad_norm": 0.01239013671875, + "kl": 0.21083315466530622, + "learning_rate": 3.8125e-06, + "loss": 0.011665140837430954, + "num_tokens": 384576.0, + "reward": 1.34949951171875, + "reward_std": 0.23986690491437912, + "rewards/JointRewardFunction/mean": 1.34949951171875, + "rewards/JointRewardFunction/std": 0.2398669108748436, + "step": 100, + "step_time": 18.349693166400904 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0125, + "completions/max_length": 382.2, + "completions/max_terminated_length": 364.2, + "completions/mean_length": 275.575, + "completions/mean_terminated_length": 272.3714294433594, + "completions/min_length": 188.2, + "completions/min_terminated_length": 188.2, + "entropy": 0.37121466230601075, + "epoch": 0.7333333333333333, + "frac_reward_zero_std": 0.6, + "grad_norm": 0.0146484375, + "kl": 0.21329910093918442, + "learning_rate": 3.1875e-06, + "loss": 0.007188273221254348, + "num_tokens": 419570.0, + "reward": 1.350439453125, + "reward_std": 0.23384397297631948, + "rewards/JointRewardFunction/mean": 1.350439453125, + "rewards/JointRewardFunction/std": 0.23384397297631948, + "step": 110, + "step_time": 18.14425329649821 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0125, + "completions/max_length": 346.6, + "completions/max_terminated_length": 345.3, + "completions/mean_length": 252.15, + "completions/mean_terminated_length": 250.38750305175782, + "completions/min_length": 166.8, + "completions/min_terminated_length": 166.8, + "entropy": 0.4173679456114769, + "epoch": 0.8, + "frac_reward_zero_std": 0.45, + "grad_norm": 2.09375, + "kl": 0.21794578088447453, + "learning_rate": 2.5625e-06, + "loss": -0.0005294814705848694, + "num_tokens": 452006.0, + "reward": 1.35863037109375, + "reward_std": 0.24987269788980485, + "rewards/JointRewardFunction/mean": 1.35863037109375, + "rewards/JointRewardFunction/std": 0.24987269788980485, + "step": 120, + "step_time": 16.82072365879685 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0375, + "completions/max_length": 441.8, + "completions/max_terminated_length": 440.3, + "completions/mean_length": 301.7375, + "completions/mean_terminated_length": 296.75750122070315, + "completions/min_length": 191.0, + "completions/min_terminated_length": 191.0, + "entropy": 0.33190380278974774, + "epoch": 0.8666666666666667, + "frac_reward_zero_std": 0.5, + "grad_norm": 2.09375, + "kl": 0.18061227248981596, + "learning_rate": 1.9375e-06, + "loss": 0.008612716197967529, + "num_tokens": 490397.0, + "reward": 1.374853515625, + "reward_std": 0.2561936320271343, + "rewards/JointRewardFunction/mean": 1.374853515625, + "rewards/JointRewardFunction/std": 0.2561936320271343, + "step": 130, + "step_time": 20.63984096989916 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 340.1, + "completions/max_terminated_length": 340.1, + "completions/mean_length": 256.5, + "completions/mean_terminated_length": 256.5, + "completions/min_length": 179.7, + "completions/min_terminated_length": 179.7, + "entropy": 0.38072127737104894, + "epoch": 0.9333333333333333, + "frac_reward_zero_std": 0.7, + "grad_norm": 2.96875, + "kl": 0.21334810927510262, + "learning_rate": 1.3125000000000001e-06, + "loss": 0.012666280567646026, + "num_tokens": 523549.0, + "reward": 1.436767578125, + "reward_std": 0.153020023368299, + "rewards/JointRewardFunction/mean": 1.436767578125, + "rewards/JointRewardFunction/std": 0.153020023368299, + "step": 140, + "step_time": 16.57510228729916 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0125, + "completions/max_length": 371.9, + "completions/max_terminated_length": 370.6, + "completions/mean_length": 257.4125, + "completions/mean_terminated_length": 254.5607147216797, + "completions/min_length": 169.0, + "completions/min_terminated_length": 169.0, + "entropy": 0.3929610840976238, + "epoch": 1.0, + "frac_reward_zero_std": 0.3, + "grad_norm": 0.01513671875, + "kl": 0.21818328225053846, + "learning_rate": 6.875000000000001e-07, + "loss": 0.011788636445999146, + "num_tokens": 556454.0, + "reward": 1.348779296875, + "reward_std": 0.3280519276857376, + "rewards/JointRewardFunction/mean": 1.348779296875, + "rewards/JointRewardFunction/std": 0.32805192805826666, + "step": 150, + "step_time": 17.876252979701167 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 343.0, + "completions/max_terminated_length": 343.0, + "completions/mean_length": 240.45, + "completions/mean_terminated_length": 240.45, + "completions/min_length": 176.0, + "completions/min_terminated_length": 176.0, + "entropy": 0.4010548871010542, + "epoch": 1.0666666666666667, + "frac_reward_zero_std": 0.65, + "grad_norm": 0.025146484375, + "kl": 0.22879955088719725, + "learning_rate": 6.250000000000001e-08, + "loss": 0.022160810232162476, + "num_tokens": 587606.0, + "reward": 1.3880859375, + "reward_std": 0.237497678399086, + "rewards/JointRewardFunction/mean": 1.3880859375, + "rewards/JointRewardFunction/std": 0.2374976843595505, + "step": 160, + "step_time": 16.50347660660045 + } + ], + "logging_steps": 10, + "max_steps": 160, + "num_input_tokens_seen": 587606, + "num_train_epochs": 2, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/last-checkpoint/training_args.bin b/last-checkpoint/training_args.bin new file mode 100644 index 0000000..5f0e5cb --- /dev/null +++ b/last-checkpoint/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9417437bcdb8d0a5c5a8b4411c9c02ad6ca73bb11f583f0a84d5aa9f9ab022e4 +size 7249 diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..fcefbf3 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:155c3c86c366bde86b963e906171bb76328518aa7c48daf0c8b4837ccffc075c +size 6171927112 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..34510ff --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..770e41d --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,30 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "local_files_only": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..5f0e5cb --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9417437bcdb8d0a5c5a8b4411c9c02ad6ca73bb11f583f0a84d5aa9f9ab022e4 +size 7249