commit 907adf1e81ad116211f807fcddf671330419e74f Author: ModelHub XC Date: Tue Jun 16 08:14:17 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: harsha070/exp2-qwen-mbpp-s123-lambda-0p30 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..a569edc --- /dev/null +++ b/README.md @@ -0,0 +1,67 @@ +--- +base_model: harsha070/sft-warmup-qwen-v2 +library_name: transformers +model_name: exp2-qwen-mbpp-s123-lambda-0p30 +tags: +- generated_from_trainer +- trl +- grpo +licence: license +--- + +# Model Card for exp2-qwen-mbpp-s123-lambda-0p30 + +This model is a fine-tuned version of [harsha070/sft-warmup-qwen-v2](https://huggingface.co/harsha070/sft-warmup-qwen-v2). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="harsha070/exp2-qwen-mbpp-s123-lambda-0p30", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + +[Visualize in Weights & Biases](https://wandb.ai/models-self5933/obfuscation-early-warning/runs/abo4fexo) + + + +This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300). + +### Framework versions + +- TRL: 1.3.0 +- Transformers: 5.7.0 +- Pytorch: 2.11.0 +- Datasets: 4.8.5 +- Tokenizers: 0.22.2 + +## Citations + +Cite GRPO as: + +```bibtex +@article{shao2024deepseekmath, + title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}}, + author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo}, + year = 2024, + eprint = {arXiv:2402.03300}, +} +``` + +Cite TRL as: + +```bibtex +@software{vonwerra2020trl, + title = {{TRL: Transformers Reinforcement Learning}}, + author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin}, + license = {Apache-2.0}, + url = {https://github.com/huggingface/trl}, + year = {2020} +} +``` \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..bdf7919 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/completions/completions_00010.parquet b/completions/completions_00010.parquet new file mode 100644 index 0000000..c6756d3 --- /dev/null +++ b/completions/completions_00010.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd3f1e18580d25a58eb20104c1774ae751478a07378de79082ab1afb94595930 +size 17305 diff --git a/completions/completions_00020.parquet b/completions/completions_00020.parquet new file mode 100644 index 0000000..664a8d4 --- /dev/null +++ b/completions/completions_00020.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:055c700d1108180c26ab518a46e3c38afd393923acb3812edfa59f8f32d04803 +size 17851 diff --git a/completions/completions_00030.parquet b/completions/completions_00030.parquet new file mode 100644 index 0000000..2305d25 --- /dev/null +++ b/completions/completions_00030.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04bcf31ff33fe6ea9b2468bb019a902f519dfe50957a49b062342f933ddaeda4 +size 18000 diff --git a/completions/completions_00040.parquet b/completions/completions_00040.parquet new file mode 100644 index 0000000..c6865da --- /dev/null +++ b/completions/completions_00040.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9149187e87442087092d429b25cf235311b2b31398a0788f9156a959574061b +size 19005 diff --git a/completions/completions_00050.parquet b/completions/completions_00050.parquet new file mode 100644 index 0000000..f11be20 --- /dev/null +++ b/completions/completions_00050.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc9e23b6b0217eebf4f8e144a1a261f2eeff23e9c7ca52606a347fa6ee46424 +size 17152 diff --git a/completions/completions_00060.parquet b/completions/completions_00060.parquet new file mode 100644 index 0000000..9cf05b8 --- /dev/null +++ b/completions/completions_00060.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d52c8f8974f3e788f7b78717ec4d04a37d702c7bc194c84e1b7c513b28dd7d8 +size 18286 diff --git a/completions/completions_00070.parquet b/completions/completions_00070.parquet new file mode 100644 index 0000000..4daaa72 --- /dev/null +++ b/completions/completions_00070.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eea2c5fa1f49955cd07ddfdab5afa84fcafa0d6a3c15f710de529c7053683ca +size 14647 diff --git a/completions/completions_00080.parquet b/completions/completions_00080.parquet new file mode 100644 index 0000000..c37ea12 --- /dev/null +++ b/completions/completions_00080.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5bde669bd2b7afadeeff1a99249c5c98f765fb471beb1644365610711d9ca93 +size 17326 diff --git a/completions/completions_00090.parquet b/completions/completions_00090.parquet new file mode 100644 index 0000000..6843265 --- /dev/null +++ b/completions/completions_00090.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d09321c93c1f3bf9e168995108d862741a211f774ec66619b22de38c1b8d98e +size 15774 diff --git a/completions/completions_00100.parquet b/completions/completions_00100.parquet new file mode 100644 index 0000000..e304aac --- /dev/null +++ b/completions/completions_00100.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed82d1b249a0488aab379a8022714e425c3e97989ab0d65bd78295212f8ef172 +size 15796 diff --git a/completions/completions_00110.parquet b/completions/completions_00110.parquet new file mode 100644 index 0000000..367c85b --- /dev/null +++ b/completions/completions_00110.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab7554d546c9e0fe80ec6d18bf96ecec278a494f24d02bc918fea7d01020894c +size 17088 diff --git a/completions/completions_00120.parquet b/completions/completions_00120.parquet new file mode 100644 index 0000000..a18f7ac --- /dev/null +++ b/completions/completions_00120.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f7c0a65dfd08556a3133d58bd270a573dd6187916c0c647bc841bf7f1e945ff +size 16141 diff --git a/completions/completions_00130.parquet b/completions/completions_00130.parquet new file mode 100644 index 0000000..1f236f1 --- /dev/null +++ b/completions/completions_00130.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3205d64a6822b4af23957b5631e3e819ecbbd6beac0cecd5403d1aa3adca2bf6 +size 16607 diff --git a/completions/completions_00140.parquet b/completions/completions_00140.parquet new file mode 100644 index 0000000..5228608 --- /dev/null +++ b/completions/completions_00140.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec02a7551ddcb1ed03eb0d8c88befa6236db0d7cb423ae53713abc1a9d9018f6 +size 16170 diff --git a/completions/completions_00150.parquet b/completions/completions_00150.parquet new file mode 100644 index 0000000..b9292cb --- /dev/null +++ b/completions/completions_00150.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:188a11a1823202f1a18c289291509605a7649546251296b9c20503734ce32fbb +size 16893 diff --git a/completions/completions_00160.parquet b/completions/completions_00160.parquet new file mode 100644 index 0000000..de5b8bb --- /dev/null +++ b/completions/completions_00160.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a02aef19c00e0a9beaced3c418e5f5e57338e584646fc23ca3b9a3b5aece3c3d +size 17790 diff --git a/completions/completions_00170.parquet b/completions/completions_00170.parquet new file mode 100644 index 0000000..300e71a --- /dev/null +++ b/completions/completions_00170.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df3ab2b5841b555fae25c87fae920969140d616ae6e32615db2f8acd9cf9f0f0 +size 15681 diff --git a/completions/completions_00180.parquet b/completions/completions_00180.parquet new file mode 100644 index 0000000..443d989 --- /dev/null +++ b/completions/completions_00180.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3e20dac106f7ae6c0819eb46b9863477fef26399696ff2a996486180f86e58a +size 16840 diff --git a/completions/completions_00190.parquet b/completions/completions_00190.parquet new file mode 100644 index 0000000..5ca780f --- /dev/null +++ b/completions/completions_00190.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6d13339eb2f72ff81df0f817e1aab4b1179fdb0ab2b2216c055ae98e8289f51 +size 15536 diff --git a/completions/completions_00200.parquet b/completions/completions_00200.parquet new file mode 100644 index 0000000..6d97dca --- /dev/null +++ b/completions/completions_00200.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3b22f1caf5c32f8f3f0fb4caf001d3a6f0257cff04a5fff2936c77b3910766d +size 16902 diff --git a/completions/completions_00210.parquet b/completions/completions_00210.parquet new file mode 100644 index 0000000..1b8768a --- /dev/null +++ b/completions/completions_00210.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb9b261614dee196dd764f57484f61eea6d19cd1c7a9c86329ba00d733755a89 +size 16814 diff --git a/completions/completions_00220.parquet b/completions/completions_00220.parquet new file mode 100644 index 0000000..5376a05 --- /dev/null +++ b/completions/completions_00220.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e46a6aa963385e7cc9737da84ab036ea27780a6384eb600a27e9697e5276e3 +size 17366 diff --git a/completions/completions_00230.parquet b/completions/completions_00230.parquet new file mode 100644 index 0000000..1f03819 --- /dev/null +++ b/completions/completions_00230.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc568b9c16ddb9804f43edf00eca1d1e165e63ede40a863ffeb94009676e33e0 +size 18892 diff --git a/completions/completions_00240.parquet b/completions/completions_00240.parquet new file mode 100644 index 0000000..2262c53 --- /dev/null +++ b/completions/completions_00240.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06723e9dcb8f90e23a15da1d1637a3d2f7dcacddfd604bfb99be14ceb5d3a8c6 +size 17938 diff --git a/completions/completions_00250.parquet b/completions/completions_00250.parquet new file mode 100644 index 0000000..68ad224 --- /dev/null +++ b/completions/completions_00250.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aed61888df51f06a9a055e6bb6856803e01402f50d8256102174de3bceace27e +size 16142 diff --git a/completions/completions_00260.parquet b/completions/completions_00260.parquet new file mode 100644 index 0000000..a3838fe --- /dev/null +++ b/completions/completions_00260.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7dc39286a77d46e6b326ac98c230c8e87b465fa2739d61df6afe520a62b7505 +size 16255 diff --git a/completions/completions_00270.parquet b/completions/completions_00270.parquet new file mode 100644 index 0000000..12c8efc --- /dev/null +++ b/completions/completions_00270.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:759596e38116b59d7548f550b9b5d0132fd97b6a62607ea6856de8d560927581 +size 18470 diff --git a/completions/completions_00280.parquet b/completions/completions_00280.parquet new file mode 100644 index 0000000..6ef5a0e --- /dev/null +++ b/completions/completions_00280.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bd9144693e56f2a2bbe74bfdb0fdd9733cba229c31c825ed2dab95c932d60b2 +size 16339 diff --git a/completions/completions_00290.parquet b/completions/completions_00290.parquet new file mode 100644 index 0000000..19d0298 --- /dev/null +++ b/completions/completions_00290.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f723757857de9258dfeae8e7df295f00f99f69b363ad00d90dabe1fecb3bb264 +size 16979 diff --git a/completions/completions_00300.parquet b/completions/completions_00300.parquet new file mode 100644 index 0000000..0cc7496 --- /dev/null +++ b/completions/completions_00300.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4650491852dcfe71eba1e6e7c65a38081d353831aee799874cb28430e8ebc66f +size 17186 diff --git a/config.json b/config.json new file mode 100644 index 0000000..64c100d --- /dev/null +++ b/config.json @@ -0,0 +1,69 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": null, + "dtype": "bfloat16", + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 2, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 1000000.0, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "transformers_version": "5.7.0", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..aaf8639 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,13 @@ +{ + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.05, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "5.7.0" +} diff --git a/last-checkpoint/chat_template.jinja b/last-checkpoint/chat_template.jinja new file mode 100644 index 0000000..bdf7919 --- /dev/null +++ b/last-checkpoint/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/last-checkpoint/config.json b/last-checkpoint/config.json new file mode 100644 index 0000000..64c100d --- /dev/null +++ b/last-checkpoint/config.json @@ -0,0 +1,69 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": null, + "dtype": "bfloat16", + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 2, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 1000000.0, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "transformers_version": "5.7.0", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/last-checkpoint/generation_config.json b/last-checkpoint/generation_config.json new file mode 100644 index 0000000..aaf8639 --- /dev/null +++ b/last-checkpoint/generation_config.json @@ -0,0 +1,13 @@ +{ + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.05, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "5.7.0" +} diff --git a/last-checkpoint/model.safetensors b/last-checkpoint/model.safetensors new file mode 100644 index 0000000..31889c1 --- /dev/null +++ b/last-checkpoint/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9acc73aed31eb69bafee378050f80493e203990d25ceb8c9c3f076e4e148bc8 +size 6171927112 diff --git a/last-checkpoint/tokenizer.json b/last-checkpoint/tokenizer.json new file mode 100644 index 0000000..34510ff --- /dev/null +++ b/last-checkpoint/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/last-checkpoint/tokenizer_config.json b/last-checkpoint/tokenizer_config.json new file mode 100644 index 0000000..770e41d --- /dev/null +++ b/last-checkpoint/tokenizer_config.json @@ -0,0 +1,30 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "local_files_only": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/last-checkpoint/trainer_state.json b/last-checkpoint/trainer_state.json new file mode 100644 index 0000000..520d413 --- /dev/null +++ b/last-checkpoint/trainer_state.json @@ -0,0 +1,874 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4125, + "completions/max_length": 499.4, + "completions/max_terminated_length": 416.1, + "completions/mean_length": 440.2, + "completions/mean_terminated_length": 360.0207153320313, + "completions/min_length": 351.3, + "completions/min_terminated_length": 300.1, + "entropy": 0.35336310751736166, + "epoch": 0.06666666666666667, + "frac_reward_zero_std": 0.15, + "grad_norm": 2.46875, + "kl": 0.04995681893542496, + "learning_rate": 9.7e-06, + "loss": 0.007881630957126618, + "num_tokens": 48280.0, + "reward": 0.6638354301452637, + "reward_std": 0.4613180309534073, + "rewards/JointRewardFunction/mean": 0.6638354301452637, + "rewards/JointRewardFunction/std": 0.46131803542375566, + "step": 10, + "step_time": 21.880370603101618 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4125, + "completions/max_length": 505.1, + "completions/max_terminated_length": 485.6, + "completions/mean_length": 454.75, + "completions/mean_terminated_length": 429.1646453857422, + "completions/min_length": 359.6, + "completions/min_terminated_length": 359.6, + "entropy": 0.40911334455013276, + "epoch": 0.13333333333333333, + "frac_reward_zero_std": 0.1, + "grad_norm": 1.859375, + "kl": 0.06663629063405097, + "learning_rate": 9.366666666666668e-06, + "loss": -0.007935921847820281, + "num_tokens": 97272.0, + "reward": 0.742634254693985, + "reward_std": 0.5053252905607224, + "rewards/JointRewardFunction/mean": 0.742634254693985, + "rewards/JointRewardFunction/std": 0.505325311422348, + "step": 20, + "step_time": 22.897377014198717 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3625, + "completions/max_length": 496.7, + "completions/max_terminated_length": 476.1, + "completions/mean_length": 449.0625, + "completions/mean_terminated_length": 423.77500610351564, + "completions/min_length": 347.1, + "completions/min_terminated_length": 347.1, + "entropy": 0.5090887371450663, + "epoch": 0.2, + "frac_reward_zero_std": 0.25, + "grad_norm": 1.40625, + "kl": 0.05061149680987, + "learning_rate": 9.033333333333334e-06, + "loss": -0.0031354159116744997, + "num_tokens": 145589.0, + "reward": 0.9517577826976776, + "reward_std": 0.356577847735025, + "rewards/JointRewardFunction/mean": 0.9517577826976776, + "rewards/JointRewardFunction/std": 0.35657785963267086, + "step": 30, + "step_time": 22.42113570249967 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 508.9, + "completions/max_terminated_length": 420.2, + "completions/mean_length": 447.0, + "completions/mean_terminated_length": 361.18226623535156, + "completions/min_length": 350.8, + "completions/min_terminated_length": 299.6, + "entropy": 0.5721639156341553, + "epoch": 0.26666666666666666, + "frac_reward_zero_std": 0.1, + "grad_norm": 2.765625, + "kl": 0.05449348199181259, + "learning_rate": 8.700000000000001e-06, + "loss": -0.0012190598994493485, + "num_tokens": 195445.0, + "reward": 0.9749389350414276, + "reward_std": 0.41350028812885287, + "rewards/JointRewardFunction/mean": 0.9749389350414276, + "rewards/JointRewardFunction/std": 0.4135002911090851, + "step": 40, + "step_time": 22.95661881720298 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1625, + "completions/max_length": 467.3, + "completions/max_terminated_length": 454.5, + "completions/mean_length": 390.775, + "completions/mean_terminated_length": 377.6511932373047, + "completions/min_length": 296.3, + "completions/min_terminated_length": 296.3, + "entropy": 0.6591073881834746, + "epoch": 0.3333333333333333, + "frac_reward_zero_std": 0.45, + "grad_norm": 3.03125, + "kl": 0.08812928411643953, + "learning_rate": 8.366666666666667e-06, + "loss": -0.008682972937822341, + "num_tokens": 240011.0, + "reward": 1.1314965546131135, + "reward_std": 0.2849295660853386, + "rewards/JointRewardFunction/mean": 1.1314965546131135, + "rewards/JointRewardFunction/std": 0.2849295552819967, + "step": 50, + "step_time": 21.223023884699796 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.175, + "completions/max_length": 473.6, + "completions/max_terminated_length": 447.8, + "completions/mean_length": 379.525, + "completions/mean_terminated_length": 354.0325042724609, + "completions/min_length": 264.9, + "completions/min_terminated_length": 264.9, + "entropy": 0.6001430394127965, + "epoch": 0.4, + "frac_reward_zero_std": 0.35, + "grad_norm": 2.71875, + "kl": 0.07532973024062813, + "learning_rate": 8.033333333333335e-06, + "loss": 0.010028349608182907, + "num_tokens": 283417.0, + "reward": 1.055295366048813, + "reward_std": 0.3583150297403336, + "rewards/JointRewardFunction/mean": 1.055295366048813, + "rewards/JointRewardFunction/std": 0.358315047621727, + "step": 60, + "step_time": 21.51059049019932 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.075, + "completions/max_length": 491.9, + "completions/max_terminated_length": 471.2, + "completions/mean_length": 376.875, + "completions/mean_terminated_length": 364.50536193847654, + "completions/min_length": 270.7, + "completions/min_terminated_length": 270.7, + "entropy": 0.5387065753340721, + "epoch": 0.4666666666666667, + "frac_reward_zero_std": 0.6, + "grad_norm": 1.9375, + "kl": 0.07996222919318825, + "learning_rate": 7.7e-06, + "loss": 0.01158405989408493, + "num_tokens": 326415.0, + "reward": 1.21389399766922, + "reward_std": 0.20652158036828042, + "rewards/JointRewardFunction/mean": 1.21389399766922, + "rewards/JointRewardFunction/std": 0.20652157738804816, + "step": 70, + "step_time": 22.325782465600422 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 452.9, + "completions/max_terminated_length": 424.3, + "completions/mean_length": 341.4625, + "completions/mean_terminated_length": 331.04286193847656, + "completions/min_length": 254.4, + "completions/min_terminated_length": 254.4, + "entropy": 0.6442078746855259, + "epoch": 0.5333333333333333, + "frac_reward_zero_std": 0.6, + "grad_norm": 2.8125, + "kl": 0.12920588869601488, + "learning_rate": 7.3666666666666676e-06, + "loss": 0.026441246271133423, + "num_tokens": 366144.0, + "reward": 1.1669726014137267, + "reward_std": 0.2815501570701599, + "rewards/JointRewardFunction/mean": 1.1669726014137267, + "rewards/JointRewardFunction/std": 0.28155014626681807, + "step": 80, + "step_time": 20.707106610499977 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.075, + "completions/max_length": 458.6, + "completions/max_terminated_length": 436.6, + "completions/mean_length": 344.7875, + "completions/mean_terminated_length": 332.6904815673828, + "completions/min_length": 257.1, + "completions/min_terminated_length": 257.1, + "entropy": 0.6171283535659313, + "epoch": 0.6, + "frac_reward_zero_std": 0.45, + "grad_norm": 0.1728515625, + "kl": 0.11114234835840761, + "learning_rate": 7.033333333333334e-06, + "loss": -0.0013597654178738594, + "num_tokens": 406607.0, + "reward": 1.0987597286701203, + "reward_std": 0.3017842784523964, + "rewards/JointRewardFunction/mean": 1.0987597286701203, + "rewards/JointRewardFunction/std": 0.3017842710018158, + "step": 90, + "step_time": 21.128214740598196 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 445.1, + "completions/max_terminated_length": 418.1, + "completions/mean_length": 350.3625, + "completions/mean_terminated_length": 341.49822387695315, + "completions/min_length": 258.7, + "completions/min_terminated_length": 258.7, + "entropy": 0.7072938833385706, + "epoch": 0.6666666666666666, + "frac_reward_zero_std": 0.5, + "grad_norm": 2.6875, + "kl": 0.10777388750575483, + "learning_rate": 6.700000000000001e-06, + "loss": 0.009119665622711182, + "num_tokens": 446820.0, + "reward": 1.167529249191284, + "reward_std": 0.27103030947037043, + "rewards/JointRewardFunction/mean": 1.167529249191284, + "rewards/JointRewardFunction/std": 0.2710303008556366, + "step": 100, + "step_time": 20.569578059400374 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1, + "completions/max_length": 497.2, + "completions/max_terminated_length": 483.4, + "completions/mean_length": 386.8, + "completions/mean_terminated_length": 376.6464324951172, + "completions/min_length": 274.3, + "completions/min_terminated_length": 274.3, + "entropy": 0.5864197112619877, + "epoch": 0.7333333333333333, + "frac_reward_zero_std": 0.55, + "grad_norm": 1.953125, + "kl": 0.10330515620298683, + "learning_rate": 6.366666666666668e-06, + "loss": 0.004881048575043678, + "num_tokens": 491392.0, + "reward": 1.1497362732887269, + "reward_std": 0.2508866846153978, + "rewards/JointRewardFunction/mean": 1.1497362732887269, + "rewards/JointRewardFunction/std": 0.25088667746749704, + "step": 110, + "step_time": 22.38237403490093 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1375, + "completions/max_length": 489.8, + "completions/max_terminated_length": 455.9, + "completions/mean_length": 402.7, + "completions/mean_terminated_length": 384.0738189697266, + "completions/min_length": 297.5, + "completions/min_terminated_length": 297.5, + "entropy": 0.5570502711459995, + "epoch": 0.8, + "frac_reward_zero_std": 0.45, + "grad_norm": 2.265625, + "kl": 0.08270290573127567, + "learning_rate": 6.033333333333335e-06, + "loss": 0.013686606287956237, + "num_tokens": 537628.0, + "reward": 1.0768773972988128, + "reward_std": 0.32933869063854215, + "rewards/JointRewardFunction/mean": 1.0768773972988128, + "rewards/JointRewardFunction/std": 0.32933869063854215, + "step": 120, + "step_time": 22.374344250299693 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1625, + "completions/max_length": 474.2, + "completions/max_terminated_length": 460.2, + "completions/mean_length": 382.75, + "completions/mean_terminated_length": 366.1206024169922, + "completions/min_length": 286.6, + "completions/min_terminated_length": 286.6, + "entropy": 0.5301733467727899, + "epoch": 0.8666666666666667, + "frac_reward_zero_std": 0.6, + "grad_norm": 2.484375, + "kl": 0.09477438307367266, + "learning_rate": 5.7e-06, + "loss": -0.0015326094813644885, + "num_tokens": 583628.0, + "reward": 1.1464037537574767, + "reward_std": 0.26481162309646605, + "rewards/JointRewardFunction/mean": 1.1464037537574767, + "rewards/JointRewardFunction/std": 0.264811622351408, + "step": 130, + "step_time": 21.819646276899583 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.175, + "completions/max_length": 492.1, + "completions/max_terminated_length": 456.2, + "completions/mean_length": 383.6375, + "completions/mean_terminated_length": 364.0131011962891, + "completions/min_length": 279.7, + "completions/min_terminated_length": 279.7, + "entropy": 0.45302344355732205, + "epoch": 0.9333333333333333, + "frac_reward_zero_std": 0.25, + "grad_norm": 3.4375, + "kl": 0.07856867800001055, + "learning_rate": 5.366666666666666e-06, + "loss": 0.027430105209350585, + "num_tokens": 628107.0, + "reward": 1.1271068811416627, + "reward_std": 0.3177687225921545, + "rewards/JointRewardFunction/mean": 1.1271068811416627, + "rewards/JointRewardFunction/std": 0.31776872408227064, + "step": 140, + "step_time": 22.27468511669831 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.175, + "completions/max_length": 488.7, + "completions/max_terminated_length": 460.8, + "completions/mean_length": 384.025, + "completions/mean_terminated_length": 360.3232177734375, + "completions/min_length": 279.9, + "completions/min_terminated_length": 279.9, + "entropy": 0.4796965181827545, + "epoch": 1.0, + "frac_reward_zero_std": 0.4, + "grad_norm": 0.08740234375, + "kl": 0.09200209667906165, + "learning_rate": 5.033333333333333e-06, + "loss": -0.009858855605125427, + "num_tokens": 671557.0, + "reward": 1.1115014195442199, + "reward_std": 0.3146645646542311, + "rewards/JointRewardFunction/mean": 1.1115014195442199, + "rewards/JointRewardFunction/std": 0.3146645750850439, + "step": 150, + "step_time": 22.151629410100213 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.225, + "completions/max_length": 501.1, + "completions/max_terminated_length": 447.8, + "completions/mean_length": 400.075, + "completions/mean_terminated_length": 361.0351318359375, + "completions/min_length": 285.0, + "completions/min_terminated_length": 285.0, + "entropy": 0.4785544477403164, + "epoch": 1.0666666666666667, + "frac_reward_zero_std": 0.5, + "grad_norm": 2.0, + "kl": 0.0879205574747175, + "learning_rate": 4.7e-06, + "loss": 0.017533975839614867, + "num_tokens": 716935.0, + "reward": 1.1597753405570983, + "reward_std": 0.29188001044094564, + "rewards/JointRewardFunction/mean": 1.1597753405570983, + "rewards/JointRewardFunction/std": 0.29188000336289405, + "step": 160, + "step_time": 22.98355916679975 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 467.4, + "completions/max_terminated_length": 460.7, + "completions/mean_length": 363.275, + "completions/mean_terminated_length": 354.8253631591797, + "completions/min_length": 251.6, + "completions/min_terminated_length": 251.6, + "entropy": 0.5262451708316803, + "epoch": 1.1333333333333333, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.060546875, + "kl": 0.09955117981880904, + "learning_rate": 4.366666666666667e-06, + "loss": -0.007418201863765716, + "num_tokens": 757865.0, + "reward": 1.1451659560203553, + "reward_std": 0.3273365020751953, + "rewards/JointRewardFunction/mean": 1.1451659560203553, + "rewards/JointRewardFunction/std": 0.3273364961147308, + "step": 170, + "step_time": 21.108805562603084 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0125, + "completions/max_length": 452.5, + "completions/max_terminated_length": 448.2, + "completions/mean_length": 337.25, + "completions/mean_terminated_length": 335.1607147216797, + "completions/min_length": 241.7, + "completions/min_terminated_length": 241.7, + "entropy": 0.5567022401839494, + "epoch": 1.2, + "frac_reward_zero_std": 0.7, + "grad_norm": 0.068359375, + "kl": 0.09737768466584384, + "learning_rate": 4.033333333333333e-06, + "loss": 0.018832828104496, + "num_tokens": 796505.0, + "reward": 1.2489745497703553, + "reward_std": 0.11990191522636451, + "rewards/JointRewardFunction/mean": 1.2489745497703553, + "rewards/JointRewardFunction/std": 0.11990190770593472, + "step": 180, + "step_time": 20.626747212697957 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1375, + "completions/max_length": 473.7, + "completions/max_terminated_length": 441.8, + "completions/mean_length": 373.1875, + "completions/mean_terminated_length": 352.4538177490234, + "completions/min_length": 259.1, + "completions/min_terminated_length": 259.1, + "entropy": 0.5100005997344852, + "epoch": 1.2666666666666666, + "frac_reward_zero_std": 0.7, + "grad_norm": 0.039794921875, + "kl": 0.09228390976786613, + "learning_rate": 3.7e-06, + "loss": -0.000696965865790844, + "num_tokens": 839088.0, + "reward": 1.23026362657547, + "reward_std": 0.14840476661920549, + "rewards/JointRewardFunction/mean": 1.23026362657547, + "rewards/JointRewardFunction/std": 0.14840476512908934, + "step": 190, + "step_time": 21.574640466592246 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1375, + "completions/max_length": 480.2, + "completions/max_terminated_length": 459.8, + "completions/mean_length": 389.75, + "completions/mean_terminated_length": 372.77679443359375, + "completions/min_length": 285.0, + "completions/min_terminated_length": 285.0, + "entropy": 0.5184403497725725, + "epoch": 1.3333333333333333, + "frac_reward_zero_std": 0.55, + "grad_norm": 2.1875, + "kl": 0.08728140082675964, + "learning_rate": 3.366666666666667e-06, + "loss": 0.021845726668834685, + "num_tokens": 883220.0, + "reward": 1.181435489654541, + "reward_std": 0.24119414222077468, + "rewards/JointRewardFunction/mean": 1.181435489654541, + "rewards/JointRewardFunction/std": 0.24119413328007794, + "step": 200, + "step_time": 21.928263508094822 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.075, + "completions/max_length": 460.3, + "completions/max_terminated_length": 452.6, + "completions/mean_length": 375.4625, + "completions/mean_terminated_length": 366.30833740234374, + "completions/min_length": 278.2, + "completions/min_terminated_length": 278.2, + "entropy": 0.5214369297027588, + "epoch": 1.4, + "frac_reward_zero_std": 0.55, + "grad_norm": 1.4375, + "kl": 0.0887975221965462, + "learning_rate": 3.0333333333333337e-06, + "loss": 0.01691504716873169, + "num_tokens": 926517.0, + "reward": 1.2304613709449768, + "reward_std": 0.189005006296793, + "rewards/JointRewardFunction/mean": 1.2304613709449768, + "rewards/JointRewardFunction/std": 0.18900499549345112, + "step": 210, + "step_time": 21.30131801480311 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1, + "completions/max_length": 470.3, + "completions/max_terminated_length": 448.9, + "completions/mean_length": 365.95, + "completions/mean_terminated_length": 351.73179016113284, + "completions/min_length": 272.3, + "completions/min_terminated_length": 272.3, + "entropy": 0.5099331840872765, + "epoch": 1.4666666666666668, + "frac_reward_zero_std": 0.6, + "grad_norm": 2.0, + "kl": 0.09267634809948504, + "learning_rate": 2.7000000000000004e-06, + "loss": -0.004276449233293534, + "num_tokens": 969621.0, + "reward": 1.0979979991912843, + "reward_std": 0.326889356970787, + "rewards/JointRewardFunction/mean": 1.0979979991912843, + "rewards/JointRewardFunction/std": 0.32688935101032257, + "step": 220, + "step_time": 21.54177276209375 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0125, + "completions/max_length": 437.0, + "completions/max_terminated_length": 435.8, + "completions/mean_length": 325.4375, + "completions/mean_terminated_length": 323.2982147216797, + "completions/min_length": 243.3, + "completions/min_terminated_length": 243.3, + "entropy": 0.527520533464849, + "epoch": 1.5333333333333332, + "frac_reward_zero_std": 0.5, + "grad_norm": 3.609375, + "kl": 0.09654896147549152, + "learning_rate": 2.3666666666666667e-06, + "loss": 0.004230192676186561, + "num_tokens": 1007468.0, + "reward": 1.1674120664596557, + "reward_std": 0.3256095230579376, + "rewards/JointRewardFunction/mean": 1.1674120664596557, + "rewards/JointRewardFunction/std": 0.32560951411724093, + "step": 230, + "step_time": 20.321823318899987 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 471.3, + "completions/max_terminated_length": 445.0, + "completions/mean_length": 387.2, + "completions/mean_terminated_length": 362.8750030517578, + "completions/min_length": 287.0, + "completions/min_terminated_length": 287.0, + "entropy": 0.5140835266560316, + "epoch": 1.6, + "frac_reward_zero_std": 0.35, + "grad_norm": 1.65625, + "kl": 0.08105785646475852, + "learning_rate": 2.0333333333333335e-06, + "loss": 0.0024391064420342446, + "num_tokens": 1053144.0, + "reward": 1.1341503262519836, + "reward_std": 0.3189578216522932, + "rewards/JointRewardFunction/mean": 1.1341503262519836, + "rewards/JointRewardFunction/std": 0.3189578127115965, + "step": 240, + "step_time": 21.733667162401254 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 476.7, + "completions/max_terminated_length": 449.8, + "completions/mean_length": 376.975, + "completions/mean_terminated_length": 357.80071716308595, + "completions/min_length": 280.1, + "completions/min_terminated_length": 280.1, + "entropy": 0.5188658468425273, + "epoch": 1.6666666666666665, + "frac_reward_zero_std": 0.65, + "grad_norm": 1.40625, + "kl": 0.09223291147500276, + "learning_rate": 1.7000000000000002e-06, + "loss": 0.003794506937265396, + "num_tokens": 1096622.0, + "reward": 1.2100683093070983, + "reward_std": 0.1948750299634412, + "rewards/JointRewardFunction/mean": 1.2100683093070983, + "rewards/JointRewardFunction/std": 0.19487502239644527, + "step": 250, + "step_time": 21.73429901890122 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.05, + "completions/max_length": 478.8, + "completions/max_terminated_length": 468.3, + "completions/mean_length": 366.125, + "completions/mean_terminated_length": 358.8738159179687, + "completions/min_length": 261.0, + "completions/min_terminated_length": 261.0, + "entropy": 0.547977139428258, + "epoch": 1.7333333333333334, + "frac_reward_zero_std": 0.45, + "grad_norm": 1.734375, + "kl": 0.09155708220787347, + "learning_rate": 1.3666666666666668e-06, + "loss": -0.014781329035758971, + "num_tokens": 1138920.0, + "reward": 1.15995112657547, + "reward_std": 0.32106488235294817, + "rewards/JointRewardFunction/mean": 1.15995112657547, + "rewards/JointRewardFunction/std": 0.32106486298143866, + "step": 260, + "step_time": 21.99600164630174 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1375, + "completions/max_length": 463.6, + "completions/max_terminated_length": 438.9, + "completions/mean_length": 375.1, + "completions/mean_terminated_length": 358.83917541503905, + "completions/min_length": 273.2, + "completions/min_terminated_length": 273.2, + "entropy": 0.5359004020690918, + "epoch": 1.8, + "frac_reward_zero_std": 0.6, + "grad_norm": 1.9140625, + "kl": 0.087837297283113, + "learning_rate": 1.0333333333333333e-06, + "loss": 0.012757700681686402, + "num_tokens": 1182736.0, + "reward": 1.213478970527649, + "reward_std": 0.17389502958394587, + "rewards/JointRewardFunction/mean": 1.213478970527649, + "rewards/JointRewardFunction/std": 0.17389501919969916, + "step": 270, + "step_time": 21.312464608701703 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0875, + "completions/max_length": 480.7, + "completions/max_terminated_length": 456.9, + "completions/mean_length": 372.65, + "completions/mean_terminated_length": 360.74524841308596, + "completions/min_length": 277.9, + "completions/min_terminated_length": 277.9, + "entropy": 0.5030612323433161, + "epoch": 1.8666666666666667, + "frac_reward_zero_std": 0.6, + "grad_norm": 1.4140625, + "kl": 0.08702772008255125, + "learning_rate": 7.000000000000001e-07, + "loss": 0.019132000207901, + "num_tokens": 1225656.0, + "reward": 1.2122631311416625, + "reward_std": 0.19066368174389936, + "rewards/JointRewardFunction/mean": 1.2122631311416625, + "rewards/JointRewardFunction/std": 0.19066367280320265, + "step": 280, + "step_time": 21.90045202969777 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.2125, + "completions/max_length": 477.3, + "completions/max_terminated_length": 440.9, + "completions/mean_length": 382.5, + "completions/mean_terminated_length": 354.6571441650391, + "completions/min_length": 282.6, + "completions/min_terminated_length": 282.6, + "entropy": 0.5357337659224868, + "epoch": 1.9333333333333333, + "frac_reward_zero_std": 0.5, + "grad_norm": 1.765625, + "kl": 0.0854645582381636, + "learning_rate": 3.666666666666667e-07, + "loss": -0.011803697794675827, + "num_tokens": 1272012.0, + "reward": 1.1109179258346558, + "reward_std": 0.31746507063508034, + "rewards/JointRewardFunction/mean": 1.1109179258346558, + "rewards/JointRewardFunction/std": 0.31746507063508034, + "step": 290, + "step_time": 21.865317538700765 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.175, + "completions/max_length": 476.9, + "completions/max_terminated_length": 435.9, + "completions/mean_length": 365.8, + "completions/mean_terminated_length": 339.0707153320312, + "completions/min_length": 260.3, + "completions/min_terminated_length": 260.3, + "entropy": 0.5455268980935216, + "epoch": 2.0, + "frac_reward_zero_std": 0.45, + "grad_norm": 1.546875, + "kl": 0.08734047506004572, + "learning_rate": 3.333333333333334e-08, + "loss": -0.005074360966682434, + "num_tokens": 1314476.0, + "reward": 1.0770751595497132, + "reward_std": 0.38892474174499514, + "rewards/JointRewardFunction/mean": 1.0770751595497132, + "rewards/JointRewardFunction/std": 0.3889247328042984, + "step": 300, + "step_time": 21.719864126896574 + } + ], + "logging_steps": 10, + "max_steps": 300, + "num_input_tokens_seen": 1314476, + "num_train_epochs": 2, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/last-checkpoint/training_args.bin b/last-checkpoint/training_args.bin new file mode 100644 index 0000000..83b5a72 --- /dev/null +++ b/last-checkpoint/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2c7a26e3c4989be7698151c66fede0eed187bf5184ad92fbc45ccf806e19414 +size 7249 diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..31889c1 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9acc73aed31eb69bafee378050f80493e203990d25ceb8c9c3f076e4e148bc8 +size 6171927112 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..34510ff --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..770e41d --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,30 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "local_files_only": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..83b5a72 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2c7a26e3c4989be7698151c66fede0eed187bf5184ad92fbc45ccf806e19414 +size 7249