commit e9d6237854a0fca86b3a6dddb2bc6fac9bdc2eb7 Author: ModelHub XC Date: Tue Jun 16 06:15:17 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: harsha070/exp2-qwen-mbpp-s42-lambda-0p25 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..918bc5d --- /dev/null +++ b/README.md @@ -0,0 +1,67 @@ +--- +base_model: harsha070/sft-warmup-qwen-v1 +library_name: transformers +model_name: exp2-qwen-mbpp-s42-lambda-0p25 +tags: +- generated_from_trainer +- trl +- grpo +licence: license +--- + +# Model Card for exp2-qwen-mbpp-s42-lambda-0p25 + +This model is a fine-tuned version of [harsha070/sft-warmup-qwen-v1](https://huggingface.co/harsha070/sft-warmup-qwen-v1). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="harsha070/exp2-qwen-mbpp-s42-lambda-0p25", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + +[Visualize in Weights & Biases](https://wandb.ai/models-self5933/obfuscation-early-warning/runs/0pjebn91) + + + +This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300). + +### Framework versions + +- TRL: 1.3.0 +- Transformers: 5.7.0 +- Pytorch: 2.11.0 +- Datasets: 4.8.5 +- Tokenizers: 0.22.2 + +## Citations + +Cite GRPO as: + +```bibtex +@article{shao2024deepseekmath, + title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}}, + author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo}, + year = 2024, + eprint = {arXiv:2402.03300}, +} +``` + +Cite TRL as: + +```bibtex +@software{vonwerra2020trl, + title = {{TRL: Transformers Reinforcement Learning}}, + author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin}, + license = {Apache-2.0}, + url = {https://github.com/huggingface/trl}, + year = {2020} +} +``` \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..bdf7919 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/completions/completions_00010.parquet b/completions/completions_00010.parquet new file mode 100644 index 0000000..7f429e0 --- /dev/null +++ b/completions/completions_00010.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0b0bbcd8288475d10ff51c0a3b2c94e4aba2f59bfb390f43371805becb83324 +size 18571 diff --git a/completions/completions_00020.parquet b/completions/completions_00020.parquet new file mode 100644 index 0000000..b319212 --- /dev/null +++ b/completions/completions_00020.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b256bdb82db0190a21975217aa841c67d74931bf8c3ffa00453a8fefb78c106f +size 17589 diff --git a/completions/completions_00030.parquet b/completions/completions_00030.parquet new file mode 100644 index 0000000..89118bd --- /dev/null +++ b/completions/completions_00030.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:079488236c30234dc3a34d3888783dd18499b439b51d2e4dd954add2d614d2d2 +size 18934 diff --git a/completions/completions_00040.parquet b/completions/completions_00040.parquet new file mode 100644 index 0000000..081b141 --- /dev/null +++ b/completions/completions_00040.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cb0735f11a6886e2f64e59c95a51229ba0c381ce1423c80c3410ebfb8686abf +size 19177 diff --git a/completions/completions_00050.parquet b/completions/completions_00050.parquet new file mode 100644 index 0000000..59f335e --- /dev/null +++ b/completions/completions_00050.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24aecf939a64b6a83235d7a696cb64ffdb984728095d6671c75d08a76ca9da82 +size 18467 diff --git a/completions/completions_00060.parquet b/completions/completions_00060.parquet new file mode 100644 index 0000000..0596b66 --- /dev/null +++ b/completions/completions_00060.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43bbe94fe454fdaabe37623b5874f1db600a4af3441ef19372dd7fe53c99779a +size 17625 diff --git a/completions/completions_00070.parquet b/completions/completions_00070.parquet new file mode 100644 index 0000000..ed49327 --- /dev/null +++ b/completions/completions_00070.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd384b2019d521ba630f03f2bee3e1e49c6eb06a42515a8e30035e4adfb6ccb3 +size 18213 diff --git a/completions/completions_00080.parquet b/completions/completions_00080.parquet new file mode 100644 index 0000000..83e81d7 --- /dev/null +++ b/completions/completions_00080.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10506d2efddf1d86bd715ebb49946dce52424cd65dc5aae299ad61175aedcd71 +size 17656 diff --git a/completions/completions_00090.parquet b/completions/completions_00090.parquet new file mode 100644 index 0000000..891c7c3 --- /dev/null +++ b/completions/completions_00090.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:825bbcc5d4407626a7e0d9491d88dc3b6d295d97683fef685271fd13f5a6255e +size 17777 diff --git a/completions/completions_00100.parquet b/completions/completions_00100.parquet new file mode 100644 index 0000000..ec504f3 --- /dev/null +++ b/completions/completions_00100.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7e78f4a11d283af6d2756606f8813105fa7c05b5ec742d7efdaaf6acdba0555 +size 18693 diff --git a/completions/completions_00110.parquet b/completions/completions_00110.parquet new file mode 100644 index 0000000..a8fcc48 --- /dev/null +++ b/completions/completions_00110.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f42aa40f179a76d58bdf2347aea88c4912852e4008bc3702bf93f24137b633d +size 20772 diff --git a/completions/completions_00120.parquet b/completions/completions_00120.parquet new file mode 100644 index 0000000..14c7d94 --- /dev/null +++ b/completions/completions_00120.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9b46d51775c0a293958904f37f85ffb891df9f911f1cd0fa9db9b1b6fbfd309 +size 20236 diff --git a/completions/completions_00130.parquet b/completions/completions_00130.parquet new file mode 100644 index 0000000..8a60bc6 --- /dev/null +++ b/completions/completions_00130.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50a779da6108c97f9f08244f94d37e0fb5270026920acfc52e4e4ea70a63d37b +size 19154 diff --git a/completions/completions_00140.parquet b/completions/completions_00140.parquet new file mode 100644 index 0000000..e6533a1 --- /dev/null +++ b/completions/completions_00140.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0119a211c3f20ca9ac836310bfce4df42d28220815582bb103b6e357269bc89 +size 16770 diff --git a/completions/completions_00150.parquet b/completions/completions_00150.parquet new file mode 100644 index 0000000..fd41b04 --- /dev/null +++ b/completions/completions_00150.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02798400369f039e2c191d03d4f1bf198bcbdc5ba50ec75a8669d4eb4e5be3b7 +size 18198 diff --git a/completions/completions_00160.parquet b/completions/completions_00160.parquet new file mode 100644 index 0000000..c18c34e --- /dev/null +++ b/completions/completions_00160.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33654a88093883856bfe5471775b9c29561227ed6a6fc7d25ee767a86960c337 +size 17214 diff --git a/completions/completions_00170.parquet b/completions/completions_00170.parquet new file mode 100644 index 0000000..55a622e --- /dev/null +++ b/completions/completions_00170.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9804d65dd7c72eb177925af42ed3f5068f323978a883c171576540d3316992a1 +size 17010 diff --git a/completions/completions_00180.parquet b/completions/completions_00180.parquet new file mode 100644 index 0000000..84ebbe0 --- /dev/null +++ b/completions/completions_00180.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57d9b53e4f360f9ed44ac22efd2d492169ef67b00c34d90fa45df31d4062473d +size 18083 diff --git a/completions/completions_00190.parquet b/completions/completions_00190.parquet new file mode 100644 index 0000000..2502a9b --- /dev/null +++ b/completions/completions_00190.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc35641cd8df86333d08a95f0c9da9477a986b375054cb9215cd1e3ee620be89 +size 17747 diff --git a/completions/completions_00200.parquet b/completions/completions_00200.parquet new file mode 100644 index 0000000..e54be1b --- /dev/null +++ b/completions/completions_00200.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a7c27cf14176a14ef5c3578c1d617049f1821b8f4004d5f253038b5a5c107f1 +size 15277 diff --git a/completions/completions_00210.parquet b/completions/completions_00210.parquet new file mode 100644 index 0000000..a7ce888 --- /dev/null +++ b/completions/completions_00210.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61e1e3aa7227df76fd2d9d5b07e68cf591b725c201853e41478177d45ea32b79 +size 16705 diff --git a/completions/completions_00220.parquet b/completions/completions_00220.parquet new file mode 100644 index 0000000..db2b8a8 --- /dev/null +++ b/completions/completions_00220.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd2e12a808f38569b03d8e0773fb80d2bd4641cef554fd0aa913592b9e031c7d +size 17824 diff --git a/completions/completions_00230.parquet b/completions/completions_00230.parquet new file mode 100644 index 0000000..5148e7d --- /dev/null +++ b/completions/completions_00230.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a3d8f94d2b60ad86cb21cfbf9f5e41b81697accab9ab418c2a55d70d0de8a1c +size 17469 diff --git a/completions/completions_00240.parquet b/completions/completions_00240.parquet new file mode 100644 index 0000000..39a25cc --- /dev/null +++ b/completions/completions_00240.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:711dc8506c6bd1cd8b245503e6346525655b1d6c3ba08d666d24b649dd72d22e +size 17930 diff --git a/completions/completions_00250.parquet b/completions/completions_00250.parquet new file mode 100644 index 0000000..1a45a3e --- /dev/null +++ b/completions/completions_00250.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62b43606401ccc06c8fe10f06d51469a2ca203c6a2cef1e7259b91c71ec0ffab +size 17810 diff --git a/completions/completions_00260.parquet b/completions/completions_00260.parquet new file mode 100644 index 0000000..ce574ad --- /dev/null +++ b/completions/completions_00260.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88003fa5e5754092db19ca79aa5b767052c66736be63428b44a2695e1c6e0779 +size 17296 diff --git a/completions/completions_00270.parquet b/completions/completions_00270.parquet new file mode 100644 index 0000000..e274006 --- /dev/null +++ b/completions/completions_00270.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21798afe0325c99b58c2424791998d4e47c22f9e13bc0bee4a4a035d5d23ebe7 +size 16380 diff --git a/completions/completions_00280.parquet b/completions/completions_00280.parquet new file mode 100644 index 0000000..75197f8 --- /dev/null +++ b/completions/completions_00280.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bd8c0cd1389dde99f201477a3e77ef19d6f5ce6d842cffd5e61ab68138d9db4 +size 16891 diff --git a/completions/completions_00290.parquet b/completions/completions_00290.parquet new file mode 100644 index 0000000..65a8fbb --- /dev/null +++ b/completions/completions_00290.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06905d2100233c07140141138813e668a3c268a669d3118531c04643ac705377 +size 18454 diff --git a/completions/completions_00300.parquet b/completions/completions_00300.parquet new file mode 100644 index 0000000..131f38e --- /dev/null +++ b/completions/completions_00300.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db26bb5dafb73b8989834bdfd5d72ddfaa3d75dec5d51e446de6ce03d3497163 +size 17804 diff --git a/config.json b/config.json new file mode 100644 index 0000000..64c100d --- /dev/null +++ b/config.json @@ -0,0 +1,69 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": null, + "dtype": "bfloat16", + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 2, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 1000000.0, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "transformers_version": "5.7.0", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..aaf8639 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,13 @@ +{ + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.05, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "5.7.0" +} diff --git a/last-checkpoint/chat_template.jinja b/last-checkpoint/chat_template.jinja new file mode 100644 index 0000000..bdf7919 --- /dev/null +++ b/last-checkpoint/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/last-checkpoint/config.json b/last-checkpoint/config.json new file mode 100644 index 0000000..64c100d --- /dev/null +++ b/last-checkpoint/config.json @@ -0,0 +1,69 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": null, + "dtype": "bfloat16", + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 2, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 1000000.0, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "transformers_version": "5.7.0", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/last-checkpoint/generation_config.json b/last-checkpoint/generation_config.json new file mode 100644 index 0000000..aaf8639 --- /dev/null +++ b/last-checkpoint/generation_config.json @@ -0,0 +1,13 @@ +{ + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.05, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "5.7.0" +} diff --git a/last-checkpoint/model.safetensors b/last-checkpoint/model.safetensors new file mode 100644 index 0000000..3a49bf6 --- /dev/null +++ b/last-checkpoint/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89a43560f87b842e22b16e9371ae166b13e2a7e87091cf48810e1cd6b62e9830 +size 6171927112 diff --git a/last-checkpoint/tokenizer.json b/last-checkpoint/tokenizer.json new file mode 100644 index 0000000..34510ff --- /dev/null +++ b/last-checkpoint/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/last-checkpoint/tokenizer_config.json b/last-checkpoint/tokenizer_config.json new file mode 100644 index 0000000..770e41d --- /dev/null +++ b/last-checkpoint/tokenizer_config.json @@ -0,0 +1,30 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "local_files_only": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/last-checkpoint/trainer_state.json b/last-checkpoint/trainer_state.json new file mode 100644 index 0000000..7e6f977 --- /dev/null +++ b/last-checkpoint/trainer_state.json @@ -0,0 +1,874 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5875, + "completions/max_length": 512.0, + "completions/max_terminated_length": 440.4, + "completions/mean_length": 468.6125, + "completions/mean_terminated_length": 379.02000122070314, + "completions/min_length": 369.0, + "completions/min_terminated_length": 317.8, + "entropy": 0.2740801006555557, + "epoch": 0.06666666666666667, + "frac_reward_zero_std": 0.15, + "grad_norm": 1.3671875, + "kl": 0.04017456619621953, + "learning_rate": 9.7e-06, + "loss": -0.004630821943283081, + "num_tokens": 50357.0, + "reward": 0.803729248046875, + "reward_std": 0.41876387000083926, + "rewards/JointRewardFunction/mean": 0.803729248046875, + "rewards/JointRewardFunction/std": 0.41876387894153594, + "step": 10, + "step_time": 23.422285906800244 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 512.0, + "completions/max_terminated_length": 328.2, + "completions/mean_length": 493.75, + "completions/mean_terminated_length": 310.2800018310547, + "completions/min_length": 446.2, + "completions/min_terminated_length": 292.6, + "entropy": 0.1757997965440154, + "epoch": 0.13333333333333333, + "frac_reward_zero_std": 0.65, + "grad_norm": 0.1865234375, + "kl": 0.064147645724006, + "learning_rate": 9.366666666666668e-06, + "loss": -0.00039904499426484107, + "num_tokens": 103617.0, + "reward": 0.85, + "reward_std": 0.3472102493047714, + "rewards/JointRewardFunction/mean": 0.85, + "rewards/JointRewardFunction/std": 0.3472102552652359, + "step": 20, + "step_time": 23.712279441399733 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6625, + "completions/max_length": 512.0, + "completions/max_terminated_length": 376.5, + "completions/mean_length": 476.5875, + "completions/mean_terminated_length": 333.94667358398436, + "completions/min_length": 374.9, + "completions/min_terminated_length": 272.5, + "entropy": 0.21253416435793043, + "epoch": 0.2, + "frac_reward_zero_std": 0.3, + "grad_norm": 2.28125, + "kl": 0.07378085452364758, + "learning_rate": 9.033333333333334e-06, + "loss": -0.012095230817794799, + "num_tokens": 153160.0, + "reward": 0.95, + "reward_std": 0.43348987102508546, + "rewards/JointRewardFunction/mean": 0.95, + "rewards/JointRewardFunction/std": 0.43348987102508546, + "step": 30, + "step_time": 23.327648415401928 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.85, + "completions/max_length": 512.0, + "completions/max_terminated_length": 284.1, + "completions/mean_length": 503.7625, + "completions/mean_terminated_length": 278.8300018310547, + "completions/min_length": 477.8, + "completions/min_terminated_length": 273.0, + "entropy": 0.21520083369687198, + "epoch": 0.26666666666666666, + "frac_reward_zero_std": 0.35, + "grad_norm": 0.255859375, + "kl": 0.06080276914872229, + "learning_rate": 8.700000000000001e-06, + "loss": -0.0005816968623548746, + "num_tokens": 206773.0, + "reward": 0.7875, + "reward_std": 0.4616557478904724, + "rewards/JointRewardFunction/mean": 0.7875, + "rewards/JointRewardFunction/std": 0.46165576577186584, + "step": 40, + "step_time": 23.59478929130273 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.7625, + "completions/max_length": 512.0, + "completions/max_terminated_length": 325.7, + "completions/mean_length": 490.0375, + "completions/mean_terminated_length": 299.44667053222656, + "completions/min_length": 433.4, + "completions/min_terminated_length": 279.8, + "entropy": 0.25695387944579123, + "epoch": 0.3333333333333333, + "frac_reward_zero_std": 0.45, + "grad_norm": 1.6953125, + "kl": 0.08556670525576919, + "learning_rate": 8.366666666666667e-06, + "loss": -0.004827765375375747, + "num_tokens": 258692.0, + "reward": 0.9375, + "reward_std": 0.3981345325708389, + "rewards/JointRewardFunction/mean": 0.9375, + "rewards/JointRewardFunction/std": 0.3981345325708389, + "step": 50, + "step_time": 23.633761547702306 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 512.0, + "completions/max_terminated_length": 332.6, + "completions/mean_length": 492.95, + "completions/mean_terminated_length": 307.1166687011719, + "completions/min_length": 437.8, + "completions/min_terminated_length": 284.2, + "entropy": 0.2603289651684463, + "epoch": 0.4, + "frac_reward_zero_std": 0.55, + "grad_norm": 1.671875, + "kl": 0.060612542228773235, + "learning_rate": 8.033333333333335e-06, + "loss": 0.0008684337139129638, + "num_tokens": 311528.0, + "reward": 0.925, + "reward_std": 0.2958350956439972, + "rewards/JointRewardFunction/mean": 0.925, + "rewards/JointRewardFunction/std": 0.29583510756492615, + "step": 60, + "step_time": 23.58587597120204 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.725, + "completions/max_length": 512.0, + "completions/max_terminated_length": 339.0, + "completions/mean_length": 492.8125, + "completions/mean_terminated_length": 313.85, + "completions/min_length": 433.5, + "completions/min_terminated_length": 279.9, + "entropy": 0.27547385785728695, + "epoch": 0.4666666666666667, + "frac_reward_zero_std": 0.6, + "grad_norm": 0.99609375, + "kl": 0.0518098235828802, + "learning_rate": 7.7e-06, + "loss": -0.0017207100987434386, + "num_tokens": 364749.0, + "reward": 0.9125, + "reward_std": 0.3383516758680344, + "rewards/JointRewardFunction/mean": 0.9125, + "rewards/JointRewardFunction/std": 0.3383516877889633, + "step": 70, + "step_time": 23.567738218297016 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.7375, + "completions/max_length": 512.0, + "completions/max_terminated_length": 449.5, + "completions/mean_length": 487.675, + "completions/mean_terminated_length": 422.1333343505859, + "completions/min_length": 393.1, + "completions/min_terminated_length": 393.1, + "entropy": 0.2849867718294263, + "epoch": 0.5333333333333333, + "frac_reward_zero_std": 0.4, + "grad_norm": 0.042724609375, + "kl": 0.054288532945793125, + "learning_rate": 7.3666666666666676e-06, + "loss": 0.01214314103126526, + "num_tokens": 417435.0, + "reward": 0.9625, + "reward_std": 0.44988160133361815, + "rewards/JointRewardFunction/mean": 0.9625, + "rewards/JointRewardFunction/std": 0.4498816192150116, + "step": 80, + "step_time": 23.797617995494512 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.7125, + "completions/max_length": 512.0, + "completions/max_terminated_length": 330.2, + "completions/mean_length": 487.5125, + "completions/mean_terminated_length": 302.90834045410156, + "completions/min_length": 421.4, + "completions/min_terminated_length": 267.8, + "entropy": 0.2951860463246703, + "epoch": 0.6, + "frac_reward_zero_std": 0.45, + "grad_norm": 1.265625, + "kl": 0.04278905827086419, + "learning_rate": 7.033333333333334e-06, + "loss": -0.0006712859496474266, + "num_tokens": 470584.0, + "reward": 0.9375, + "reward_std": 0.31479085683822633, + "rewards/JointRewardFunction/mean": 0.9375, + "rewards/JointRewardFunction/std": 0.3147908627986908, + "step": 90, + "step_time": 23.514957720592793 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 512.0, + "completions/max_terminated_length": 459.9, + "completions/mean_length": 472.3125, + "completions/mean_terminated_length": 418.8316680908203, + "completions/min_length": 365.9, + "completions/min_terminated_length": 365.9, + "entropy": 0.37076061628758905, + "epoch": 0.6666666666666666, + "frac_reward_zero_std": 0.4, + "grad_norm": 2.15625, + "kl": 0.05282424986362457, + "learning_rate": 6.700000000000001e-06, + "loss": -0.009071560204029083, + "num_tokens": 523253.0, + "reward": 0.9815673828125, + "reward_std": 0.4160596996545792, + "rewards/JointRewardFunction/mean": 0.9815673828125, + "rewards/JointRewardFunction/std": 0.41605971157550814, + "step": 100, + "step_time": 23.664498128004197 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6125, + "completions/max_length": 509.7, + "completions/max_terminated_length": 484.8, + "completions/mean_length": 475.7, + "completions/mean_terminated_length": 437.1983367919922, + "completions/min_length": 383.8, + "completions/min_terminated_length": 383.8, + "entropy": 0.4262677112594247, + "epoch": 0.7333333333333333, + "frac_reward_zero_std": 0.4, + "grad_norm": 2.0625, + "kl": 0.03714689936023206, + "learning_rate": 6.366666666666668e-06, + "loss": 0.008247312903404237, + "num_tokens": 574257.0, + "reward": 1.037060546875, + "reward_std": 0.3255626171827316, + "rewards/JointRewardFunction/mean": 1.037060546875, + "rewards/JointRewardFunction/std": 0.3255626350641251, + "step": 110, + "step_time": 23.18678354880103 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.575, + "completions/max_length": 512.0, + "completions/max_terminated_length": 388.5, + "completions/mean_length": 472.7625, + "completions/mean_terminated_length": 342.3550079345703, + "completions/min_length": 396.3, + "completions/min_terminated_length": 293.9, + "entropy": 0.4861995566636324, + "epoch": 0.8, + "frac_reward_zero_std": 0.35, + "grad_norm": 1.40625, + "kl": 0.039038634288590404, + "learning_rate": 6.033333333333335e-06, + "loss": 0.012050890922546386, + "num_tokens": 624342.0, + "reward": 0.925, + "reward_std": 0.4410230278968811, + "rewards/JointRewardFunction/mean": 0.925, + "rewards/JointRewardFunction/std": 0.441023051738739, + "step": 120, + "step_time": 23.72601753709896 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5375, + "completions/max_length": 511.6, + "completions/max_terminated_length": 476.8, + "completions/mean_length": 470.0375, + "completions/mean_terminated_length": 427.0658416748047, + "completions/min_length": 372.0, + "completions/min_terminated_length": 372.0, + "entropy": 0.41965660247951747, + "epoch": 0.8666666666666667, + "frac_reward_zero_std": 0.55, + "grad_norm": 1.4453125, + "kl": 0.03558391091646627, + "learning_rate": 5.7e-06, + "loss": 0.0031338028609752657, + "num_tokens": 676197.0, + "reward": 1.029736328125, + "reward_std": 0.3752464294433594, + "rewards/JointRewardFunction/mean": 1.029736328125, + "rewards/JointRewardFunction/std": 0.37524643540382385, + "step": 130, + "step_time": 23.592583017596915 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 512.0, + "completions/max_terminated_length": 470.4, + "completions/mean_length": 468.3375, + "completions/mean_terminated_length": 422.97833862304685, + "completions/min_length": 369.0, + "completions/min_terminated_length": 369.0, + "entropy": 0.4238532094284892, + "epoch": 0.9333333333333333, + "frac_reward_zero_std": 0.4, + "grad_norm": 0.048095703125, + "kl": 0.047195866727270185, + "learning_rate": 5.366666666666666e-06, + "loss": 0.011741240322589875, + "num_tokens": 726296.0, + "reward": 1.0271484375, + "reward_std": 0.39577038288116456, + "rewards/JointRewardFunction/mean": 1.0271484375, + "rewards/JointRewardFunction/std": 0.39577039480209353, + "step": 140, + "step_time": 23.827525900093313 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.475, + "completions/max_length": 507.6, + "completions/max_terminated_length": 417.3, + "completions/mean_length": 452.0375, + "completions/mean_terminated_length": 360.2283416748047, + "completions/min_length": 351.9, + "completions/min_terminated_length": 300.7, + "entropy": 0.405802302993834, + "epoch": 1.0, + "frac_reward_zero_std": 0.5, + "grad_norm": 1.3125, + "kl": 0.05754059529863298, + "learning_rate": 5.033333333333333e-06, + "loss": 0.020150861144065856, + "num_tokens": 774771.0, + "reward": 1.0125, + "reward_std": 0.37200968265533446, + "rewards/JointRewardFunction/mean": 1.0125, + "rewards/JointRewardFunction/std": 0.3720097005367279, + "step": 150, + "step_time": 23.477148605001275 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.35, + "completions/max_length": 504.7, + "completions/max_terminated_length": 460.6, + "completions/mean_length": 418.725, + "completions/mean_terminated_length": 376.1097686767578, + "completions/min_length": 290.2, + "completions/min_terminated_length": 290.2, + "entropy": 0.4137630261480808, + "epoch": 1.0666666666666667, + "frac_reward_zero_std": 0.55, + "grad_norm": 1.5234375, + "kl": 0.04866745978360996, + "learning_rate": 4.7e-06, + "loss": -0.012348555028438568, + "num_tokens": 820185.0, + "reward": 1.1374755859375, + "reward_std": 0.2693489044904709, + "rewards/JointRewardFunction/mean": 1.1374755859375, + "rewards/JointRewardFunction/std": 0.2693489044904709, + "step": 160, + "step_time": 23.104779590805993 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.45, + "completions/max_length": 512.0, + "completions/max_terminated_length": 459.3, + "completions/mean_length": 452.575, + "completions/mean_terminated_length": 405.1588165283203, + "completions/min_length": 342.3, + "completions/min_terminated_length": 342.3, + "entropy": 0.3708974776789546, + "epoch": 1.1333333333333333, + "frac_reward_zero_std": 0.8, + "grad_norm": 0.032470703125, + "kl": 0.05273645754205063, + "learning_rate": 4.366666666666667e-06, + "loss": 0.0020671430975198746, + "num_tokens": 868979.0, + "reward": 1.2064208984375, + "reward_std": 0.1077505886554718, + "rewards/JointRewardFunction/mean": 1.2064208984375, + "rewards/JointRewardFunction/std": 0.10775059163570404, + "step": 170, + "step_time": 23.706487477812335 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3875, + "completions/max_length": 512.0, + "completions/max_terminated_length": 481.1, + "completions/mean_length": 445.025, + "completions/mean_terminated_length": 411.8959655761719, + "completions/min_length": 341.8, + "completions/min_terminated_length": 341.8, + "entropy": 0.3909618055447936, + "epoch": 1.2, + "frac_reward_zero_std": 0.55, + "grad_norm": 1.7109375, + "kl": 0.045815252687316385, + "learning_rate": 4.033333333333333e-06, + "loss": 0.004306042194366455, + "num_tokens": 917385.0, + "reward": 1.075, + "reward_std": 0.3337466180324554, + "rewards/JointRewardFunction/mean": 1.075, + "rewards/JointRewardFunction/std": 0.3337466180324554, + "step": 180, + "step_time": 23.554991897323635 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6125, + "completions/max_length": 512.0, + "completions/max_terminated_length": 471.2, + "completions/mean_length": 468.075, + "completions/mean_terminated_length": 422.6735778808594, + "completions/min_length": 385.9, + "completions/min_terminated_length": 385.9, + "entropy": 0.36821637134999036, + "epoch": 1.2666666666666666, + "frac_reward_zero_std": 0.55, + "grad_norm": 1.8671875, + "kl": 0.04221574537805282, + "learning_rate": 3.7e-06, + "loss": 0.005308620631694794, + "num_tokens": 968751.0, + "reward": 0.95, + "reward_std": 0.3438155859708786, + "rewards/JointRewardFunction/mean": 0.95, + "rewards/JointRewardFunction/std": 0.34381560385227206, + "step": 190, + "step_time": 23.511522994117694 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 497.9, + "completions/max_terminated_length": 470.5, + "completions/mean_length": 431.05, + "completions/mean_terminated_length": 400.3317901611328, + "completions/min_length": 318.5, + "completions/min_terminated_length": 318.5, + "entropy": 0.41096227150410414, + "epoch": 1.3333333333333333, + "frac_reward_zero_std": 0.65, + "grad_norm": 0.06640625, + "kl": 0.04462224093731493, + "learning_rate": 3.366666666666667e-06, + "loss": 0.0076727248728275296, + "num_tokens": 1016343.0, + "reward": 1.125, + "reward_std": 0.21674493551254273, + "rewards/JointRewardFunction/mean": 1.125, + "rewards/JointRewardFunction/std": 0.21674493551254273, + "step": 200, + "step_time": 23.060737183006133 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3875, + "completions/max_length": 512.0, + "completions/max_terminated_length": 482.7, + "completions/mean_length": 458.65, + "completions/mean_terminated_length": 428.9585815429688, + "completions/min_length": 363.5, + "completions/min_terminated_length": 363.5, + "entropy": 0.3947587950155139, + "epoch": 1.4, + "frac_reward_zero_std": 0.65, + "grad_norm": 9.3125, + "kl": 0.10990666588768364, + "learning_rate": 3.0333333333333337e-06, + "loss": 0.010126692801713943, + "num_tokens": 1066311.0, + "reward": 1.111083984375, + "reward_std": 0.28541127145290374, + "rewards/JointRewardFunction/mean": 1.111083984375, + "rewards/JointRewardFunction/std": 0.2854112803936005, + "step": 210, + "step_time": 23.734341114398557 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 511.2, + "completions/max_terminated_length": 499.2, + "completions/mean_length": 448.1, + "completions/mean_terminated_length": 415.37428894042966, + "completions/min_length": 327.8, + "completions/min_terminated_length": 327.8, + "entropy": 0.4135129824280739, + "epoch": 1.4666666666666668, + "frac_reward_zero_std": 0.4, + "grad_norm": 2.0, + "kl": 0.04890955399023369, + "learning_rate": 2.7000000000000004e-06, + "loss": 0.005412362515926361, + "num_tokens": 1115131.0, + "reward": 1.0, + "reward_std": 0.407365021109581, + "rewards/JointRewardFunction/mean": 1.0, + "rewards/JointRewardFunction/std": 0.4073650389909744, + "step": 220, + "step_time": 23.674485975824062 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3, + "completions/max_length": 493.0, + "completions/max_terminated_length": 468.3, + "completions/mean_length": 425.175, + "completions/mean_terminated_length": 400.7392883300781, + "completions/min_length": 313.4, + "completions/min_terminated_length": 313.4, + "entropy": 0.40025499686598776, + "epoch": 1.5333333333333332, + "frac_reward_zero_std": 0.6, + "grad_norm": 0.045166015625, + "kl": 0.044459241011645646, + "learning_rate": 2.3666666666666667e-06, + "loss": 0.0074857622385025024, + "num_tokens": 1162029.0, + "reward": 1.0625, + "reward_std": 0.332049286365509, + "rewards/JointRewardFunction/mean": 1.0625, + "rewards/JointRewardFunction/std": 0.3320492923259735, + "step": 230, + "step_time": 22.975970137718832 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5125, + "completions/max_length": 507.5, + "completions/max_terminated_length": 417.0, + "completions/mean_length": 443.7875, + "completions/mean_terminated_length": 338.87250366210935, + "completions/min_length": 315.1, + "completions/min_terminated_length": 263.9, + "entropy": 0.3852341592311859, + "epoch": 1.6, + "frac_reward_zero_std": 0.5, + "grad_norm": 2.109375, + "kl": 0.039597276959102604, + "learning_rate": 2.0333333333333335e-06, + "loss": 0.015155516564846039, + "num_tokens": 1211952.0, + "reward": 1.03134765625, + "reward_std": 0.30796128809452056, + "rewards/JointRewardFunction/mean": 1.03134765625, + "rewards/JointRewardFunction/std": 0.30796128809452056, + "step": 240, + "step_time": 23.477432061507717 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4125, + "completions/max_length": 499.5, + "completions/max_terminated_length": 454.8, + "completions/mean_length": 440.175, + "completions/mean_terminated_length": 402.6975067138672, + "completions/min_length": 342.6, + "completions/min_terminated_length": 342.6, + "entropy": 0.4049293929710984, + "epoch": 1.6666666666666665, + "frac_reward_zero_std": 0.55, + "grad_norm": 0.06689453125, + "kl": 0.057483326562214644, + "learning_rate": 1.7000000000000002e-06, + "loss": 0.005497528612613678, + "num_tokens": 1260194.0, + "reward": 1.100634765625, + "reward_std": 0.30466278195381163, + "rewards/JointRewardFunction/mean": 1.100634765625, + "rewards/JointRewardFunction/std": 0.30466278940439223, + "step": 250, + "step_time": 23.31596095281129 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4125, + "completions/max_length": 512.0, + "completions/max_terminated_length": 486.0, + "completions/mean_length": 455.5875, + "completions/mean_terminated_length": 423.4404815673828, + "completions/min_length": 368.3, + "completions/min_terminated_length": 368.3, + "entropy": 0.37657185792922976, + "epoch": 1.7333333333333334, + "frac_reward_zero_std": 0.55, + "grad_norm": 1.78125, + "kl": 0.044227164250332865, + "learning_rate": 1.3666666666666668e-06, + "loss": 0.01007101833820343, + "num_tokens": 1312481.0, + "reward": 1.0563232421875, + "reward_std": 0.3230230301618576, + "rewards/JointRewardFunction/mean": 1.0563232421875, + "rewards/JointRewardFunction/std": 0.3230230316519737, + "step": 260, + "step_time": 23.83616074830352 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 511.2, + "completions/max_terminated_length": 478.9, + "completions/mean_length": 419.7, + "completions/mean_terminated_length": 390.71238708496094, + "completions/min_length": 292.4, + "completions/min_terminated_length": 292.4, + "entropy": 0.3890227179042995, + "epoch": 1.8, + "frac_reward_zero_std": 0.85, + "grad_norm": 0.0517578125, + "kl": 0.04973521351348609, + "learning_rate": 1.0333333333333333e-06, + "loss": 0.00801372081041336, + "num_tokens": 1358297.0, + "reward": 1.1375, + "reward_std": 0.12793734967708587, + "rewards/JointRewardFunction/mean": 1.1375, + "rewards/JointRewardFunction/std": 0.12793734967708587, + "step": 270, + "step_time": 23.87891690217657 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.45, + "completions/max_length": 512.0, + "completions/max_terminated_length": 464.2, + "completions/mean_length": 440.9125, + "completions/mean_terminated_length": 394.0228607177734, + "completions/min_length": 329.2, + "completions/min_terminated_length": 329.2, + "entropy": 0.3685274325311184, + "epoch": 1.8666666666666667, + "frac_reward_zero_std": 0.6, + "grad_norm": 0.134765625, + "kl": 0.05201944473665208, + "learning_rate": 7.000000000000001e-07, + "loss": 0.013003082573413849, + "num_tokens": 1406762.0, + "reward": 1.123974609375, + "reward_std": 0.25532945692539216, + "rewards/JointRewardFunction/mean": 1.123974609375, + "rewards/JointRewardFunction/std": 0.25532945692539216, + "step": 280, + "step_time": 23.774091701293948 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4125, + "completions/max_length": 508.9, + "completions/max_terminated_length": 447.5, + "completions/mean_length": 443.3125, + "completions/mean_terminated_length": 385.1429840087891, + "completions/min_length": 311.9, + "completions/min_terminated_length": 311.9, + "entropy": 0.3619745412841439, + "epoch": 1.9333333333333333, + "frac_reward_zero_std": 0.55, + "grad_norm": 1.296875, + "kl": 0.04537691879086196, + "learning_rate": 3.666666666666667e-07, + "loss": 0.015167883038520813, + "num_tokens": 1456063.0, + "reward": 1.1, + "reward_std": 0.2966939479112625, + "rewards/JointRewardFunction/mean": 1.1, + "rewards/JointRewardFunction/std": 0.296693953871727, + "step": 290, + "step_time": 23.711561490091846 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3625, + "completions/max_length": 511.4, + "completions/max_terminated_length": 471.0, + "completions/mean_length": 445.95, + "completions/mean_terminated_length": 418.78345947265626, + "completions/min_length": 355.0, + "completions/min_terminated_length": 355.0, + "entropy": 0.3853150447830558, + "epoch": 2.0, + "frac_reward_zero_std": 0.6, + "grad_norm": 1.8515625, + "kl": 0.04785980319138616, + "learning_rate": 3.333333333333334e-08, + "loss": -0.0016140155494213104, + "num_tokens": 1504095.0, + "reward": 1.1375, + "reward_std": 0.2394672751426697, + "rewards/JointRewardFunction/mean": 1.1375, + "rewards/JointRewardFunction/std": 0.23946728110313414, + "step": 300, + "step_time": 23.609792864409975 + } + ], + "logging_steps": 10, + "max_steps": 300, + "num_input_tokens_seen": 1504095, + "num_train_epochs": 2, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/last-checkpoint/training_args.bin b/last-checkpoint/training_args.bin new file mode 100644 index 0000000..12c8b99 --- /dev/null +++ b/last-checkpoint/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ce376f35c622be385d16371a211e21a08a548f37ed9b16106324741412ca461 +size 7249 diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..3a49bf6 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89a43560f87b842e22b16e9371ae166b13e2a7e87091cf48810e1cd6b62e9830 +size 6171927112 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..34510ff --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..770e41d --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,30 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "local_files_only": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..12c8b99 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ce376f35c622be385d16371a211e21a08a548f37ed9b16106324741412ca461 +size 7249