commit 9c46ef56a14f928d95e87eddf57b23780b4c4068 Author: ModelHub XC Date: Tue Jun 16 07:51:18 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: harsha070/expfinal-qwen-island-s42-lambda-0p25 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..794a3d3 --- /dev/null +++ b/README.md @@ -0,0 +1,67 @@ +--- +base_model: Qwen/Qwen2.5-3B-Instruct +library_name: transformers +model_name: expfinal-qwen-island-s42-lambda-0p25 +tags: +- generated_from_trainer +- grpo +- trl +licence: license +--- + +# Model Card for expfinal-qwen-island-s42-lambda-0p25 + +This model is a fine-tuned version of [Qwen/Qwen2.5-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="harsha070/expfinal-qwen-island-s42-lambda-0p25", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + +[Visualize in Weights & Biases](https://wandb.ai/models-self5933/obfuscation-early-warning/runs/oslo2mig) + + + +This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300). + +### Framework versions + +- TRL: 1.3.0 +- Transformers: 5.7.0 +- Pytorch: 2.11.0 +- Datasets: 4.8.5 +- Tokenizers: 0.22.2 + +## Citations + +Cite GRPO as: + +```bibtex +@article{shao2024deepseekmath, + title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}}, + author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo}, + year = 2024, + eprint = {arXiv:2402.03300}, +} +``` + +Cite TRL as: + +```bibtex +@software{vonwerra2020trl, + title = {{TRL: Transformers Reinforcement Learning}}, + author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin}, + license = {Apache-2.0}, + url = {https://github.com/huggingface/trl}, + year = {2020} +} +``` \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..bdf7919 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/completions/completions_00010.parquet b/completions/completions_00010.parquet new file mode 100644 index 0000000..297e35f --- /dev/null +++ b/completions/completions_00010.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90746dc10efad8addc3f8068be1a52105b200ac90f544ed8768d771ff2edc3ba +size 21472 diff --git a/completions/completions_00020.parquet b/completions/completions_00020.parquet new file mode 100644 index 0000000..d5f8b9c --- /dev/null +++ b/completions/completions_00020.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dab3fb723e89f1843a041e4c7bd05429a1f2aaf62ff805f06c7b87f08c64cae +size 24190 diff --git a/completions/completions_00030.parquet b/completions/completions_00030.parquet new file mode 100644 index 0000000..efc24d8 --- /dev/null +++ b/completions/completions_00030.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cfdf53f9d2c21aed2d68cd21f756e6ea83a9fecd0275760cc25bfb07d436efb +size 24854 diff --git a/completions/completions_00040.parquet b/completions/completions_00040.parquet new file mode 100644 index 0000000..e2df626 --- /dev/null +++ b/completions/completions_00040.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6062b7c4e8481ba5e8de19ddf00d36ce1a5606158abb8132c7a79309eedcf59f +size 17061 diff --git a/completions/completions_00050.parquet b/completions/completions_00050.parquet new file mode 100644 index 0000000..d29ebb7 --- /dev/null +++ b/completions/completions_00050.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f14607160dea59eb3fb439f40f9a61e1dbd25af4bf911fd7c46304b95052ce12 +size 21288 diff --git a/completions/completions_00060.parquet b/completions/completions_00060.parquet new file mode 100644 index 0000000..bc34c91 --- /dev/null +++ b/completions/completions_00060.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cf8ad640c1fab1b529c835e5382ed7bd266ec689dda1bb20b8fdd389014f7a8 +size 23016 diff --git a/completions/completions_00070.parquet b/completions/completions_00070.parquet new file mode 100644 index 0000000..3016067 --- /dev/null +++ b/completions/completions_00070.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f95e09fcec7e65dc3518060aefd9a78e07f8e3a8a41743029fdadaf54b6a746 +size 24487 diff --git a/completions/completions_00080.parquet b/completions/completions_00080.parquet new file mode 100644 index 0000000..d68623d --- /dev/null +++ b/completions/completions_00080.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d20de237cb640a8a30e5da91ec19770ab6c2f0f5f022bbc991bc184a44286e38 +size 23409 diff --git a/completions/completions_00090.parquet b/completions/completions_00090.parquet new file mode 100644 index 0000000..f7cb2d2 --- /dev/null +++ b/completions/completions_00090.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c38801b68c0eee119430916ae4ead76b5d0a52af0c80170e66e9f80fbdf98e8d +size 27581 diff --git a/completions/completions_00100.parquet b/completions/completions_00100.parquet new file mode 100644 index 0000000..a753094 --- /dev/null +++ b/completions/completions_00100.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c708abefc5038b15408288912046bfd90648264e36037ee56a342556ff72295 +size 16192 diff --git a/completions/completions_00110.parquet b/completions/completions_00110.parquet new file mode 100644 index 0000000..c2904bd --- /dev/null +++ b/completions/completions_00110.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:834cc79feb803c433f42e81f88889661312dd9e953468433688f90c08b2cd9ee +size 17989 diff --git a/completions/completions_00120.parquet b/completions/completions_00120.parquet new file mode 100644 index 0000000..9af3766 --- /dev/null +++ b/completions/completions_00120.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f810f23fc3f750fa6af6cbf1e00381cc51ed54ea9e9faa7ed9ca3f282d940c0 +size 23666 diff --git a/completions/completions_00130.parquet b/completions/completions_00130.parquet new file mode 100644 index 0000000..56ca273 --- /dev/null +++ b/completions/completions_00130.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:911d91c7fbed6d2045c56b5ad34e044508f179d10ad1b625433336c3f40d28eb +size 19963 diff --git a/completions/completions_00140.parquet b/completions/completions_00140.parquet new file mode 100644 index 0000000..f33143c --- /dev/null +++ b/completions/completions_00140.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e9000142a9980284b43fbf9cb06f1f98d34203eef103b3deaddcbd958712d64 +size 16591 diff --git a/completions/completions_00150.parquet b/completions/completions_00150.parquet new file mode 100644 index 0000000..643d5f5 --- /dev/null +++ b/completions/completions_00150.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a707c4328fd900bb3fcf44596812c96542adfcf55f71cb5b353e34b17ab7be08 +size 19950 diff --git a/completions/completions_00160.parquet b/completions/completions_00160.parquet new file mode 100644 index 0000000..11b00f3 --- /dev/null +++ b/completions/completions_00160.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b039721fd9587b351f76d30125d77749090796806017510b5b0afe61d165d79b +size 21524 diff --git a/config.json b/config.json new file mode 100644 index 0000000..64c100d --- /dev/null +++ b/config.json @@ -0,0 +1,69 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": null, + "dtype": "bfloat16", + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 2, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 1000000.0, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "transformers_version": "5.7.0", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..aaf8639 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,13 @@ +{ + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.05, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "5.7.0" +} diff --git a/last-checkpoint/chat_template.jinja b/last-checkpoint/chat_template.jinja new file mode 100644 index 0000000..bdf7919 --- /dev/null +++ b/last-checkpoint/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/last-checkpoint/config.json b/last-checkpoint/config.json new file mode 100644 index 0000000..64c100d --- /dev/null +++ b/last-checkpoint/config.json @@ -0,0 +1,69 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": null, + "dtype": "bfloat16", + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 2, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 1000000.0, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "transformers_version": "5.7.0", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/last-checkpoint/generation_config.json b/last-checkpoint/generation_config.json new file mode 100644 index 0000000..aaf8639 --- /dev/null +++ b/last-checkpoint/generation_config.json @@ -0,0 +1,13 @@ +{ + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.05, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "5.7.0" +} diff --git a/last-checkpoint/model.safetensors b/last-checkpoint/model.safetensors new file mode 100644 index 0000000..b691159 --- /dev/null +++ b/last-checkpoint/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f2a6ee09b8a2b9681cd30c70233f3c0ffd05af9b38f65aad3f92a2b5999f904 +size 6171927112 diff --git a/last-checkpoint/tokenizer.json b/last-checkpoint/tokenizer.json new file mode 100644 index 0000000..34510ff --- /dev/null +++ b/last-checkpoint/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/last-checkpoint/tokenizer_config.json b/last-checkpoint/tokenizer_config.json new file mode 100644 index 0000000..770e41d --- /dev/null +++ b/last-checkpoint/tokenizer_config.json @@ -0,0 +1,30 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "local_files_only": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/last-checkpoint/trainer_state.json b/last-checkpoint/trainer_state.json new file mode 100644 index 0000000..ea16696 --- /dev/null +++ b/last-checkpoint/trainer_state.json @@ -0,0 +1,482 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.28, + "eval_steps": 500, + "global_step": 160, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 959.3, + "completions/max_terminated_length": 883.0, + "completions/mean_length": 628.5875, + "completions/mean_terminated_length": 604.4089508056641, + "completions/min_length": 324.2, + "completions/min_terminated_length": 324.2, + "entropy": 0.8960087668150664, + "epoch": 0.08, + "frac_reward_zero_std": 0.225, + "grad_norm": 1.734375, + "kl": 0.04410275101472507, + "learning_rate": 9.4375e-06, + "loss": -0.004480601102113724, + "num_tokens": 70931.0, + "reward": 0.637615966796875, + "reward_std": 0.4684956520795822, + "rewards/JointRewardFunction/mean": 0.637615966796875, + "rewards/JointRewardFunction/std": 0.4684956640005112, + "step": 10, + "step_time": 39.2735027824996 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1375, + "completions/max_length": 943.7, + "completions/max_terminated_length": 829.8, + "completions/mean_length": 631.4625, + "completions/mean_terminated_length": 579.1692932128906, + "completions/min_length": 324.2, + "completions/min_terminated_length": 324.2, + "entropy": 1.0276442520320415, + "epoch": 0.16, + "frac_reward_zero_std": 0.25, + "grad_norm": 1.703125, + "kl": 0.08849158070515842, + "learning_rate": 8.8125e-06, + "loss": 0.01496470272541046, + "num_tokens": 142092.0, + "reward": 0.539324951171875, + "reward_std": 0.49008582532405853, + "rewards/JointRewardFunction/mean": 0.539324951171875, + "rewards/JointRewardFunction/std": 0.4900858402252197, + "step": 20, + "step_time": 40.54311619299951 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0125, + "completions/max_length": 863.4, + "completions/max_terminated_length": 835.9, + "completions/mean_length": 554.5875, + "completions/mean_terminated_length": 548.3660766601563, + "completions/min_length": 307.3, + "completions/min_terminated_length": 307.3, + "entropy": 1.1838637091219426, + "epoch": 0.24, + "frac_reward_zero_std": 0.1, + "grad_norm": 3.046875, + "kl": 0.11827877229079604, + "learning_rate": 8.1875e-06, + "loss": 0.009982097893953323, + "num_tokens": 206903.0, + "reward": 0.6125, + "reward_std": 0.4704344987869263, + "rewards/JointRewardFunction/mean": 0.6125, + "rewards/JointRewardFunction/std": 0.47043450474739074, + "step": 30, + "step_time": 37.010521245800916 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.025, + "completions/max_length": 742.1, + "completions/max_terminated_length": 659.0, + "completions/mean_length": 425.7, + "completions/mean_terminated_length": 410.8892883300781, + "completions/min_length": 219.2, + "completions/min_terminated_length": 219.2, + "entropy": 1.2652597405016421, + "epoch": 0.32, + "frac_reward_zero_std": 0.15, + "grad_norm": 6.125, + "kl": 0.3052154924720526, + "learning_rate": 7.5625e-06, + "loss": 0.027592796087265014, + "num_tokens": 261533.0, + "reward": 0.61771240234375, + "reward_std": 0.46430147290229795, + "rewards/JointRewardFunction/mean": 0.61771240234375, + "rewards/JointRewardFunction/std": 0.46430149376392366, + "step": 40, + "step_time": 32.063390286398864 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0875, + "completions/max_length": 919.3, + "completions/max_terminated_length": 713.2, + "completions/mean_length": 511.4125, + "completions/mean_terminated_length": 464.44822998046874, + "completions/min_length": 244.6, + "completions/min_terminated_length": 244.6, + "entropy": 1.3154176332056522, + "epoch": 0.4, + "frac_reward_zero_std": 0.1, + "grad_norm": 3.6875, + "kl": 0.24474074998870493, + "learning_rate": 6.9375e-06, + "loss": 0.001818625070154667, + "num_tokens": 323276.0, + "reward": 0.537213134765625, + "reward_std": 0.5033262223005295, + "rewards/JointRewardFunction/mean": 0.537213134765625, + "rewards/JointRewardFunction/std": 0.5033262312412262, + "step": 50, + "step_time": 39.09056931000159 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 824.5, + "completions/max_terminated_length": 711.6, + "completions/mean_length": 520.4625, + "completions/mean_terminated_length": 484.48512268066406, + "completions/min_length": 261.1, + "completions/min_terminated_length": 261.1, + "entropy": 1.2718341693282127, + "epoch": 0.48, + "frac_reward_zero_std": 0.225, + "grad_norm": 4.6875, + "kl": 0.2580411507748067, + "learning_rate": 6.3125e-06, + "loss": 0.014349016547203063, + "num_tokens": 385253.0, + "reward": 0.54111328125, + "reward_std": 0.4758811920881271, + "rewards/JointRewardFunction/mean": 0.54111328125, + "rewards/JointRewardFunction/std": 0.47588120102882386, + "step": 60, + "step_time": 35.274126689498736 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 943.6, + "completions/max_terminated_length": 808.7, + "completions/mean_length": 537.75, + "completions/mean_terminated_length": 502.15358276367186, + "completions/min_length": 267.1, + "completions/min_terminated_length": 267.1, + "entropy": 1.244665590673685, + "epoch": 0.56, + "frac_reward_zero_std": 0.175, + "grad_norm": 4.0625, + "kl": 0.24244523425586523, + "learning_rate": 5.6875e-06, + "loss": 0.0023317448794841766, + "num_tokens": 448729.0, + "reward": 0.69791259765625, + "reward_std": 0.4654367908835411, + "rewards/JointRewardFunction/mean": 0.69791259765625, + "rewards/JointRewardFunction/std": 0.4654367953538895, + "step": 70, + "step_time": 40.008808337500525 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.05, + "completions/max_length": 810.1, + "completions/max_terminated_length": 720.2, + "completions/mean_length": 534.1625, + "completions/mean_terminated_length": 508.99286193847655, + "completions/min_length": 286.4, + "completions/min_terminated_length": 286.4, + "entropy": 1.2685907267034053, + "epoch": 0.64, + "frac_reward_zero_std": 0.225, + "grad_norm": 4.125, + "kl": 0.2162147051654756, + "learning_rate": 5.0625e-06, + "loss": 0.014069165289402007, + "num_tokens": 512076.0, + "reward": 0.728045654296875, + "reward_std": 0.5096077308058738, + "rewards/JointRewardFunction/mean": 0.728045654296875, + "rewards/JointRewardFunction/std": 0.5096077516674995, + "step": 80, + "step_time": 34.66793936170143 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.025, + "completions/max_length": 793.3, + "completions/max_terminated_length": 733.4, + "completions/mean_length": 492.2, + "completions/mean_terminated_length": 478.558935546875, + "completions/min_length": 245.4, + "completions/min_terminated_length": 245.4, + "entropy": 1.24496211335063, + "epoch": 0.72, + "frac_reward_zero_std": 0.175, + "grad_norm": 4.03125, + "kl": 0.24482853645458819, + "learning_rate": 4.4375e-06, + "loss": 0.032863426208496097, + "num_tokens": 572072.0, + "reward": 0.64200439453125, + "reward_std": 0.5001831084489823, + "rewards/JointRewardFunction/mean": 0.64200439453125, + "rewards/JointRewardFunction/std": 0.5001831203699112, + "step": 90, + "step_time": 34.359502547597 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 768.8, + "completions/max_terminated_length": 768.8, + "completions/mean_length": 475.65, + "completions/mean_terminated_length": 475.65, + "completions/min_length": 258.8, + "completions/min_terminated_length": 258.8, + "entropy": 1.330046895891428, + "epoch": 0.8, + "frac_reward_zero_std": 0.175, + "grad_norm": 3.375, + "kl": 0.24854949009604752, + "learning_rate": 3.8125e-06, + "loss": 0.0744681715965271, + "num_tokens": 630790.0, + "reward": 0.725848388671875, + "reward_std": 0.4486017137765884, + "rewards/JointRewardFunction/mean": 0.725848388671875, + "rewards/JointRewardFunction/std": 0.4486017107963562, + "step": 100, + "step_time": 33.44759687739897 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0125, + "completions/max_length": 695.2, + "completions/max_terminated_length": 669.5, + "completions/mean_length": 443.0125, + "completions/mean_terminated_length": 436.7017883300781, + "completions/min_length": 213.1, + "completions/min_terminated_length": 213.1, + "entropy": 1.3013170935213565, + "epoch": 0.88, + "frac_reward_zero_std": 0.275, + "grad_norm": 3.984375, + "kl": 0.3150750307366252, + "learning_rate": 3.1875e-06, + "loss": 0.020898757874965666, + "num_tokens": 686775.0, + "reward": 0.71988525390625, + "reward_std": 0.515497374534607, + "rewards/JointRewardFunction/mean": 0.71988525390625, + "rewards/JointRewardFunction/std": 0.515497374534607, + "step": 110, + "step_time": 30.408145976099696 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.025, + "completions/max_length": 757.0, + "completions/max_terminated_length": 667.6, + "completions/mean_length": 428.675, + "completions/mean_terminated_length": 412.0500061035156, + "completions/min_length": 201.6, + "completions/min_terminated_length": 201.6, + "entropy": 1.2176271453499794, + "epoch": 0.96, + "frac_reward_zero_std": 0.175, + "grad_norm": 6.53125, + "kl": 0.29011352979578076, + "learning_rate": 2.5625e-06, + "loss": 0.023420125246047974, + "num_tokens": 741441.0, + "reward": 0.738653564453125, + "reward_std": 0.45226994156837463, + "rewards/JointRewardFunction/mean": 0.738653564453125, + "rewards/JointRewardFunction/std": 0.45226994901895523, + "step": 120, + "step_time": 32.81503715240033 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 645.8, + "completions/max_terminated_length": 645.8, + "completions/mean_length": 429.1875, + "completions/mean_terminated_length": 429.1875, + "completions/min_length": 218.8, + "completions/min_terminated_length": 218.8, + "entropy": 1.313951000571251, + "epoch": 1.04, + "frac_reward_zero_std": 0.3, + "grad_norm": 3.78125, + "kl": 0.3095056655351073, + "learning_rate": 1.9375e-06, + "loss": 0.031660494208335874, + "num_tokens": 796394.0, + "reward": 0.7450439453125, + "reward_std": 0.46799357831478117, + "rewards/JointRewardFunction/mean": 0.7450439453125, + "rewards/JointRewardFunction/std": 0.46799357831478117, + "step": 130, + "step_time": 28.36433180910135 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 661.5, + "completions/max_terminated_length": 661.5, + "completions/mean_length": 416.2625, + "completions/mean_terminated_length": 416.2625, + "completions/min_length": 235.6, + "completions/min_terminated_length": 235.6, + "entropy": 1.3542070075869561, + "epoch": 1.12, + "frac_reward_zero_std": 0.2, + "grad_norm": 2.328125, + "kl": 0.3334041152149439, + "learning_rate": 1.3125000000000001e-06, + "loss": 0.022475141286849975, + "num_tokens": 850343.0, + "reward": 0.67725830078125, + "reward_std": 0.3997616931796074, + "rewards/JointRewardFunction/mean": 0.67725830078125, + "rewards/JointRewardFunction/std": 0.399761700630188, + "step": 140, + "step_time": 28.77159399340053 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 703.9, + "completions/max_terminated_length": 703.9, + "completions/mean_length": 422.925, + "completions/mean_terminated_length": 422.925, + "completions/min_length": 209.7, + "completions/min_terminated_length": 209.7, + "entropy": 1.2913881182670592, + "epoch": 1.2, + "frac_reward_zero_std": 0.2, + "grad_norm": 3.734375, + "kl": 0.31345505844801663, + "learning_rate": 6.875000000000001e-07, + "loss": 0.029654264450073242, + "num_tokens": 904753.0, + "reward": 0.79158935546875, + "reward_std": 0.4540836468338966, + "rewards/JointRewardFunction/mean": 0.79158935546875, + "rewards/JointRewardFunction/std": 0.45408365726470945, + "step": 150, + "step_time": 30.629617839398996 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 645.8, + "completions/max_terminated_length": 645.8, + "completions/mean_length": 411.0125, + "completions/mean_terminated_length": 411.0125, + "completions/min_length": 207.4, + "completions/min_terminated_length": 207.4, + "entropy": 1.2935365058481694, + "epoch": 1.28, + "frac_reward_zero_std": 0.175, + "grad_norm": 3.171875, + "kl": 0.3413598489947617, + "learning_rate": 6.250000000000001e-08, + "loss": -0.008248078823089599, + "num_tokens": 958536.0, + "reward": 0.695233154296875, + "reward_std": 0.40624782145023347, + "rewards/JointRewardFunction/mean": 0.695233154296875, + "rewards/JointRewardFunction/std": 0.40624783337116244, + "step": 160, + "step_time": 28.084300646101475 + } + ], + "logging_steps": 10, + "max_steps": 160, + "num_input_tokens_seen": 958536, + "num_train_epochs": 2, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/last-checkpoint/training_args.bin b/last-checkpoint/training_args.bin new file mode 100644 index 0000000..3df8356 --- /dev/null +++ b/last-checkpoint/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2981ff6e4b5c1859a58933c6d902a0dc25387394cd8471dca3c2469f88f7cd0e +size 7249 diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..b691159 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f2a6ee09b8a2b9681cd30c70233f3c0ffd05af9b38f65aad3f92a2b5999f904 +size 6171927112 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..34510ff --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..770e41d --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,30 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "local_files_only": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..3df8356 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2981ff6e4b5c1859a58933c6d902a0dc25387394cd8471dca3c2469f88f7cd0e +size 7249