From 39fcc471d51519ae6d04e0dae7f0bcda2b47477c Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Tue, 16 Jun 2026 07:12:17 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: harsha070/exp2-qwen-island-s42-lambda-0p45 Source: Original Platform --- .gitattributes | 36 ++ README.md | 67 ++++ chat_template.jinja | 54 +++ completions/completions_00010.parquet | 3 + completions/completions_00020.parquet | 3 + completions/completions_00030.parquet | 3 + completions/completions_00040.parquet | 3 + completions/completions_00050.parquet | 3 + completions/completions_00060.parquet | 3 + completions/completions_00070.parquet | 3 + completions/completions_00080.parquet | 3 + completions/completions_00090.parquet | 3 + completions/completions_00100.parquet | 3 + completions/completions_00110.parquet | 3 + completions/completions_00120.parquet | 3 + completions/completions_00130.parquet | 3 + completions/completions_00140.parquet | 3 + completions/completions_00150.parquet | 3 + completions/completions_00160.parquet | 3 + config.json | 69 ++++ generation_config.json | 13 + last-checkpoint/chat_template.jinja | 54 +++ last-checkpoint/config.json | 69 ++++ last-checkpoint/generation_config.json | 13 + last-checkpoint/model.safetensors | 3 + last-checkpoint/tokenizer.json | 3 + last-checkpoint/tokenizer_config.json | 30 ++ last-checkpoint/trainer_state.json | 482 +++++++++++++++++++++++++ last-checkpoint/training_args.bin | 3 + model.safetensors | 3 + tokenizer.json | 3 + tokenizer_config.json | 30 ++ training_args.bin | 3 + 33 files changed, 983 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 chat_template.jinja create mode 100644 completions/completions_00010.parquet create mode 100644 completions/completions_00020.parquet create mode 100644 completions/completions_00030.parquet create mode 100644 completions/completions_00040.parquet create mode 100644 completions/completions_00050.parquet create mode 100644 completions/completions_00060.parquet create mode 100644 completions/completions_00070.parquet create mode 100644 completions/completions_00080.parquet create mode 100644 completions/completions_00090.parquet create mode 100644 completions/completions_00100.parquet create mode 100644 completions/completions_00110.parquet create mode 100644 completions/completions_00120.parquet create mode 100644 completions/completions_00130.parquet create mode 100644 completions/completions_00140.parquet create mode 100644 completions/completions_00150.parquet create mode 100644 completions/completions_00160.parquet create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 last-checkpoint/chat_template.jinja create mode 100644 last-checkpoint/config.json create mode 100644 last-checkpoint/generation_config.json create mode 100644 last-checkpoint/model.safetensors create mode 100644 last-checkpoint/tokenizer.json create mode 100644 last-checkpoint/tokenizer_config.json create mode 100644 last-checkpoint/trainer_state.json create mode 100644 last-checkpoint/training_args.bin create mode 100644 model.safetensors create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 training_args.bin diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..9eacf13 --- /dev/null +++ b/README.md @@ -0,0 +1,67 @@ +--- +base_model: Qwen/Qwen2.5-3B-Instruct +library_name: transformers +model_name: exp2-qwen-island-s42-lambda-0p45 +tags: +- generated_from_trainer +- trl +- grpo +licence: license +--- + +# Model Card for exp2-qwen-island-s42-lambda-0p45 + +This model is a fine-tuned version of [Qwen/Qwen2.5-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="harsha070/exp2-qwen-island-s42-lambda-0p45", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + +[Visualize in Weights & Biases](https://wandb.ai/models-self5933/obfuscation-early-warning/runs/yeg20xyd) + + + +This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300). + +### Framework versions + +- TRL: 1.3.0 +- Transformers: 5.7.0 +- Pytorch: 2.11.0 +- Datasets: 4.8.5 +- Tokenizers: 0.22.2 + +## Citations + +Cite GRPO as: + +```bibtex +@article{shao2024deepseekmath, + title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}}, + author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo}, + year = 2024, + eprint = {arXiv:2402.03300}, +} +``` + +Cite TRL as: + +```bibtex +@software{vonwerra2020trl, + title = {{TRL: Transformers Reinforcement Learning}}, + author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin}, + license = {Apache-2.0}, + url = {https://github.com/huggingface/trl}, + year = {2020} +} +``` \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..bdf7919 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/completions/completions_00010.parquet b/completions/completions_00010.parquet new file mode 100644 index 0000000..d03f59e --- /dev/null +++ b/completions/completions_00010.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2e91e8631ca5c6726a1b03a5a3caf92454481d1e9aad5de83352011c3bfdd24 +size 22400 diff --git a/completions/completions_00020.parquet b/completions/completions_00020.parquet new file mode 100644 index 0000000..bf1cdc4 --- /dev/null +++ b/completions/completions_00020.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dd47305e123ca1f88bf25dec0b906d6d802901fde7a42132a5397eaa052b58a +size 18796 diff --git a/completions/completions_00030.parquet b/completions/completions_00030.parquet new file mode 100644 index 0000000..5fdfc74 --- /dev/null +++ b/completions/completions_00030.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:458f95dad8ff52051ff95899017ee15be64e4e40c8c19f439301ec5037bf4b0a +size 32319 diff --git a/completions/completions_00040.parquet b/completions/completions_00040.parquet new file mode 100644 index 0000000..d655d41 --- /dev/null +++ b/completions/completions_00040.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2a3855279bd5bdbbf5949abf10aedd90f398345ad897baf9f6e84c2c9253293 +size 20334 diff --git a/completions/completions_00050.parquet b/completions/completions_00050.parquet new file mode 100644 index 0000000..6b9bbeb --- /dev/null +++ b/completions/completions_00050.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:629a79aca73afecb306892366740b28e0d8f88e986deceb305a30dbb4240a640 +size 25568 diff --git a/completions/completions_00060.parquet b/completions/completions_00060.parquet new file mode 100644 index 0000000..f0b0646 --- /dev/null +++ b/completions/completions_00060.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2a7ca75c53f8b5b7f8b76f4d49a2f15e7aca82124ff892484be8630717fcd65 +size 24450 diff --git a/completions/completions_00070.parquet b/completions/completions_00070.parquet new file mode 100644 index 0000000..a5f2031 --- /dev/null +++ b/completions/completions_00070.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d297394ac565f7fd311b728788ec9c1b27433def675454031c06a97844104e7 +size 21272 diff --git a/completions/completions_00080.parquet b/completions/completions_00080.parquet new file mode 100644 index 0000000..4b1d2f0 --- /dev/null +++ b/completions/completions_00080.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:996557868ea18ec7633b1d48226080762a2340d66212a6b67fe79f0580eb4d2e +size 21345 diff --git a/completions/completions_00090.parquet b/completions/completions_00090.parquet new file mode 100644 index 0000000..5ae463f --- /dev/null +++ b/completions/completions_00090.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44da506ee9c5ae1cb436b31f17b9281ba690ff1334fbeb5588a40008128f94cc +size 23882 diff --git a/completions/completions_00100.parquet b/completions/completions_00100.parquet new file mode 100644 index 0000000..b7954eb --- /dev/null +++ b/completions/completions_00100.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b06e867df640bc48f60572b0b66aa0dc42622b89f2e3eb46997dba5eebb275b +size 24262 diff --git a/completions/completions_00110.parquet b/completions/completions_00110.parquet new file mode 100644 index 0000000..13750ce --- /dev/null +++ b/completions/completions_00110.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea5b5034bcb507ce150e604a0d653a75b39faae287f8555141fb8f7563bf5f24 +size 23218 diff --git a/completions/completions_00120.parquet b/completions/completions_00120.parquet new file mode 100644 index 0000000..a13294b --- /dev/null +++ b/completions/completions_00120.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e9ac528a68c832e11c5fafd279e3246c5c4939debfdd85695590ba89402aae6 +size 20241 diff --git a/completions/completions_00130.parquet b/completions/completions_00130.parquet new file mode 100644 index 0000000..1c815b0 --- /dev/null +++ b/completions/completions_00130.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:051d80e6d8e9e7c749411b0bbf2b785c4a90c56cf854e25fed912b128985505b +size 21915 diff --git a/completions/completions_00140.parquet b/completions/completions_00140.parquet new file mode 100644 index 0000000..daee4d9 --- /dev/null +++ b/completions/completions_00140.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:424047c438601bbea0a529d167b9a37a8cdcec08b9f8476d02264bb9fbd21677 +size 18931 diff --git a/completions/completions_00150.parquet b/completions/completions_00150.parquet new file mode 100644 index 0000000..f1b2c78 --- /dev/null +++ b/completions/completions_00150.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c84255798b66a111fb779448cd37c12ccc28ba5f09025a5fe441068d3880929 +size 22378 diff --git a/completions/completions_00160.parquet b/completions/completions_00160.parquet new file mode 100644 index 0000000..ac14214 --- /dev/null +++ b/completions/completions_00160.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9784d76b2ce42a96583a0d6c4712f944bb9dd5be2a5904f1040bba13b637838a +size 23287 diff --git a/config.json b/config.json new file mode 100644 index 0000000..64c100d --- /dev/null +++ b/config.json @@ -0,0 +1,69 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": null, + "dtype": "bfloat16", + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 2, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 1000000.0, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "transformers_version": "5.7.0", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..aaf8639 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,13 @@ +{ + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.05, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "5.7.0" +} diff --git a/last-checkpoint/chat_template.jinja b/last-checkpoint/chat_template.jinja new file mode 100644 index 0000000..bdf7919 --- /dev/null +++ b/last-checkpoint/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/last-checkpoint/config.json b/last-checkpoint/config.json new file mode 100644 index 0000000..64c100d --- /dev/null +++ b/last-checkpoint/config.json @@ -0,0 +1,69 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": null, + "dtype": "bfloat16", + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 2, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 1000000.0, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "transformers_version": "5.7.0", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/last-checkpoint/generation_config.json b/last-checkpoint/generation_config.json new file mode 100644 index 0000000..aaf8639 --- /dev/null +++ b/last-checkpoint/generation_config.json @@ -0,0 +1,13 @@ +{ + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.05, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "5.7.0" +} diff --git a/last-checkpoint/model.safetensors b/last-checkpoint/model.safetensors new file mode 100644 index 0000000..3b4c795 --- /dev/null +++ b/last-checkpoint/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49aef298d5ac0da244b9510e3897fc2feed1ca55b3ae1c237ce1faf27f872639 +size 6171927112 diff --git a/last-checkpoint/tokenizer.json b/last-checkpoint/tokenizer.json new file mode 100644 index 0000000..34510ff --- /dev/null +++ b/last-checkpoint/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/last-checkpoint/tokenizer_config.json b/last-checkpoint/tokenizer_config.json new file mode 100644 index 0000000..770e41d --- /dev/null +++ b/last-checkpoint/tokenizer_config.json @@ -0,0 +1,30 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "local_files_only": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/last-checkpoint/trainer_state.json b/last-checkpoint/trainer_state.json new file mode 100644 index 0000000..ad8ed43 --- /dev/null +++ b/last-checkpoint/trainer_state.json @@ -0,0 +1,482 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.28, + "eval_steps": 500, + "global_step": 160, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.05, + "completions/max_length": 892.4, + "completions/max_terminated_length": 819.9, + "completions/mean_length": 573.1125, + "completions/mean_terminated_length": 551.0821472167969, + "completions/min_length": 313.1, + "completions/min_terminated_length": 313.1, + "entropy": 0.7912669345736504, + "epoch": 0.08, + "frac_reward_zero_std": 0.35, + "grad_norm": 2.359375, + "kl": 0.04604918217446539, + "learning_rate": 9.4375e-06, + "loss": 0.021108362078666686, + "num_tokens": 66493.0, + "reward": 0.6003784224390983, + "reward_std": 0.5601114392280578, + "rewards/JointRewardFunction/mean": 0.6003784224390983, + "rewards/JointRewardFunction/std": 0.5601114451885223, + "step": 10, + "step_time": 37.44416529300106 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.05, + "completions/max_length": 800.3, + "completions/max_terminated_length": 673.7, + "completions/mean_length": 473.2875, + "completions/mean_terminated_length": 445.1142974853516, + "completions/min_length": 249.0, + "completions/min_terminated_length": 249.0, + "entropy": 1.0140388168394565, + "epoch": 0.16, + "frac_reward_zero_std": 0.4, + "grad_norm": 3.75, + "kl": 0.25841885171830653, + "learning_rate": 8.8125e-06, + "loss": 0.03933271169662476, + "num_tokens": 125000.0, + "reward": 0.9494775831699371, + "reward_std": 0.4877780556678772, + "rewards/JointRewardFunction/mean": 0.9494775831699371, + "rewards/JointRewardFunction/std": 0.4877780944108963, + "step": 20, + "step_time": 35.173660528497564 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 951.5, + "completions/max_terminated_length": 813.4, + "completions/mean_length": 634.375, + "completions/mean_terminated_length": 576.2744140625, + "completions/min_length": 331.8, + "completions/min_terminated_length": 331.8, + "entropy": 1.309154535830021, + "epoch": 0.24, + "frac_reward_zero_std": 0.375, + "grad_norm": 1.859375, + "kl": 0.2194912993349135, + "learning_rate": 8.1875e-06, + "loss": 0.026669433712959288, + "num_tokens": 196194.0, + "reward": 1.0899658679962159, + "reward_std": 0.5111429423093796, + "rewards/JointRewardFunction/mean": 1.0899658679962159, + "rewards/JointRewardFunction/std": 0.5111429691314697, + "step": 30, + "step_time": 41.286807323301765 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.05, + "completions/max_length": 848.0, + "completions/max_terminated_length": 783.4, + "completions/mean_length": 569.7125, + "completions/mean_terminated_length": 549.1333435058593, + "completions/min_length": 369.3, + "completions/min_terminated_length": 369.3, + "entropy": 1.1440452575683593, + "epoch": 0.32, + "frac_reward_zero_std": 0.25, + "grad_norm": 2.671875, + "kl": 0.13953614169731737, + "learning_rate": 7.5625e-06, + "loss": 0.013277828693389893, + "num_tokens": 262345.0, + "reward": 0.8887524664402008, + "reward_std": 0.5226060330867768, + "rewards/JointRewardFunction/mean": 0.8887524664402008, + "rewards/JointRewardFunction/std": 0.5226060688495636, + "step": 40, + "step_time": 37.13389427080001 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 875.8, + "completions/max_terminated_length": 875.8, + "completions/mean_length": 560.675, + "completions/mean_terminated_length": 560.675, + "completions/min_length": 320.4, + "completions/min_terminated_length": 320.4, + "entropy": 1.1498947571963072, + "epoch": 0.4, + "frac_reward_zero_std": 0.15, + "grad_norm": 2.875, + "kl": 0.09845088529400528, + "learning_rate": 6.9375e-06, + "loss": 0.03778347373008728, + "num_tokens": 328029.0, + "reward": 0.8338769674301147, + "reward_std": 0.5376368969678879, + "rewards/JointRewardFunction/mean": 0.8338769674301147, + "rewards/JointRewardFunction/std": 0.5376369208097458, + "step": 50, + "step_time": 38.24463103190137 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 829.7, + "completions/max_terminated_length": 829.7, + "completions/mean_length": 531.85, + "completions/mean_terminated_length": 531.85, + "completions/min_length": 287.5, + "completions/min_terminated_length": 287.5, + "entropy": 1.1666041024029254, + "epoch": 0.48, + "frac_reward_zero_std": 0.075, + "grad_norm": 2.015625, + "kl": 0.0893499652389437, + "learning_rate": 6.3125e-06, + "loss": 0.008259650319814682, + "num_tokens": 390917.0, + "reward": 0.6566748261451721, + "reward_std": 0.5261385977268219, + "rewards/JointRewardFunction/mean": 0.6566748261451721, + "rewards/JointRewardFunction/std": 0.5261386096477508, + "step": 60, + "step_time": 36.22042608499832 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 760.1, + "completions/max_terminated_length": 760.1, + "completions/mean_length": 505.25, + "completions/mean_terminated_length": 505.25, + "completions/min_length": 287.4, + "completions/min_terminated_length": 287.4, + "entropy": 1.1663248613476753, + "epoch": 0.56, + "frac_reward_zero_std": 0.2, + "grad_norm": 1.9296875, + "kl": 0.09666758836247027, + "learning_rate": 5.6875e-06, + "loss": 0.04623619616031647, + "num_tokens": 451793.0, + "reward": 0.8685888767242431, + "reward_std": 0.5478669673204422, + "rewards/JointRewardFunction/mean": 0.8685888767242431, + "rewards/JointRewardFunction/std": 0.5478669852018356, + "step": 70, + "step_time": 33.734915479502526 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 692.8, + "completions/max_terminated_length": 692.8, + "completions/mean_length": 482.775, + "completions/mean_terminated_length": 482.775, + "completions/min_length": 305.5, + "completions/min_terminated_length": 305.5, + "entropy": 1.1494709253311157, + "epoch": 0.64, + "frac_reward_zero_std": 0.125, + "grad_norm": 2.765625, + "kl": 0.11551248212344944, + "learning_rate": 5.0625e-06, + "loss": -0.016333292424678802, + "num_tokens": 511029.0, + "reward": 1.0234729290008544, + "reward_std": 0.49485546052455903, + "rewards/JointRewardFunction/mean": 1.0234729290008544, + "rewards/JointRewardFunction/std": 0.4948554873466492, + "step": 80, + "step_time": 31.04121985129932 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 633.5, + "completions/max_terminated_length": 633.5, + "completions/mean_length": 459.7625, + "completions/mean_terminated_length": 459.7625, + "completions/min_length": 280.1, + "completions/min_terminated_length": 280.1, + "entropy": 1.1554153360426427, + "epoch": 0.72, + "frac_reward_zero_std": 0.325, + "grad_norm": 1.78125, + "kl": 0.1084105208516121, + "learning_rate": 4.4375e-06, + "loss": -0.037684041261672976, + "num_tokens": 568430.0, + "reward": 1.0063842952251434, + "reward_std": 0.45139331221580503, + "rewards/JointRewardFunction/mean": 1.0063842952251434, + "rewards/JointRewardFunction/std": 0.45139334285631777, + "step": 90, + "step_time": 28.41928261620196 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 694.2, + "completions/max_terminated_length": 694.2, + "completions/mean_length": 483.575, + "completions/mean_terminated_length": 483.575, + "completions/min_length": 270.1, + "completions/min_terminated_length": 270.1, + "entropy": 1.1369776532053948, + "epoch": 0.8, + "frac_reward_zero_std": 0.4, + "grad_norm": 2.25, + "kl": 0.12164497645571828, + "learning_rate": 3.8125e-06, + "loss": -0.011769261211156845, + "num_tokens": 627782.0, + "reward": 1.045616489648819, + "reward_std": 0.48985774293541906, + "rewards/JointRewardFunction/mean": 1.045616489648819, + "rewards/JointRewardFunction/std": 0.48985776007175447, + "step": 100, + "step_time": 30.974922098598473 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 711.8, + "completions/max_terminated_length": 711.8, + "completions/mean_length": 478.5875, + "completions/mean_terminated_length": 478.5875, + "completions/min_length": 269.1, + "completions/min_terminated_length": 269.1, + "entropy": 1.1295112110674381, + "epoch": 0.88, + "frac_reward_zero_std": 0.275, + "grad_norm": 1.9765625, + "kl": 0.11388859800063074, + "learning_rate": 3.1875e-06, + "loss": 0.00085725337266922, + "num_tokens": 686613.0, + "reward": 0.9904785633087159, + "reward_std": 0.5183025985956192, + "rewards/JointRewardFunction/mean": 0.9904785633087159, + "rewards/JointRewardFunction/std": 0.5183026134967804, + "step": 110, + "step_time": 31.47189465950178 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 681.1, + "completions/max_terminated_length": 681.1, + "completions/mean_length": 474.575, + "completions/mean_terminated_length": 474.575, + "completions/min_length": 306.6, + "completions/min_terminated_length": 306.6, + "entropy": 1.0753874629735947, + "epoch": 0.96, + "frac_reward_zero_std": 0.225, + "grad_norm": 2.3125, + "kl": 0.11223098039627075, + "learning_rate": 2.5625e-06, + "loss": 0.02290368378162384, + "num_tokens": 744951.0, + "reward": 1.1014428853988647, + "reward_std": 0.43211724162101744, + "rewards/JointRewardFunction/mean": 1.1014428853988647, + "rewards/JointRewardFunction/std": 0.4321172535419464, + "step": 120, + "step_time": 30.187947676197656 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 700.8, + "completions/max_terminated_length": 700.8, + "completions/mean_length": 482.3, + "completions/mean_terminated_length": 482.3, + "completions/min_length": 292.0, + "completions/min_terminated_length": 292.0, + "entropy": 1.100510736554861, + "epoch": 1.04, + "frac_reward_zero_std": 0.175, + "grad_norm": 1.890625, + "kl": 0.11114813778549433, + "learning_rate": 1.9375e-06, + "loss": -0.00960662066936493, + "num_tokens": 804153.0, + "reward": 1.0441894710063935, + "reward_std": 0.4884881317615509, + "rewards/JointRewardFunction/mean": 1.0441894710063935, + "rewards/JointRewardFunction/std": 0.48848815858364103, + "step": 130, + "step_time": 31.431871901799603 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 654.8, + "completions/max_terminated_length": 654.8, + "completions/mean_length": 455.1875, + "completions/mean_terminated_length": 455.1875, + "completions/min_length": 274.1, + "completions/min_terminated_length": 274.1, + "entropy": 1.1068335216492415, + "epoch": 1.12, + "frac_reward_zero_std": 0.35, + "grad_norm": 2.296875, + "kl": 0.11968475547619165, + "learning_rate": 1.3125000000000001e-06, + "loss": -0.020959584414958952, + "num_tokens": 861216.0, + "reward": 1.0450488328933716, + "reward_std": 0.4309545159339905, + "rewards/JointRewardFunction/mean": 1.0450488328933716, + "rewards/JointRewardFunction/std": 0.43095452189445493, + "step": 140, + "step_time": 29.483576541099684 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 677.2, + "completions/max_terminated_length": 677.2, + "completions/mean_length": 466.3125, + "completions/mean_terminated_length": 466.3125, + "completions/min_length": 296.8, + "completions/min_terminated_length": 296.8, + "entropy": 1.0722076326608658, + "epoch": 1.2, + "frac_reward_zero_std": 0.45, + "grad_norm": 0.85546875, + "kl": 0.1184447065461427, + "learning_rate": 6.875000000000001e-07, + "loss": 0.02653493583202362, + "num_tokens": 919097.0, + "reward": 1.2055200517177582, + "reward_std": 0.3585283608641475, + "rewards/JointRewardFunction/mean": 1.2055200517177582, + "rewards/JointRewardFunction/std": 0.35852835562545804, + "step": 150, + "step_time": 30.24508252329979 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 672.0, + "completions/max_terminated_length": 672.0, + "completions/mean_length": 470.9375, + "completions/mean_terminated_length": 470.9375, + "completions/min_length": 331.2, + "completions/min_terminated_length": 331.2, + "entropy": 1.0674828842282296, + "epoch": 1.28, + "frac_reward_zero_std": 0.425, + "grad_norm": 1.875, + "kl": 0.12561513194814325, + "learning_rate": 6.250000000000001e-08, + "loss": -0.007510318607091904, + "num_tokens": 977674.0, + "reward": 1.1753845453262328, + "reward_std": 0.39318075180053713, + "rewards/JointRewardFunction/mean": 1.1753845453262328, + "rewards/JointRewardFunction/std": 0.3931807607412338, + "step": 160, + "step_time": 29.98714039499864 + } + ], + "logging_steps": 10, + "max_steps": 160, + "num_input_tokens_seen": 977674, + "num_train_epochs": 2, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/last-checkpoint/training_args.bin b/last-checkpoint/training_args.bin new file mode 100644 index 0000000..e74cc58 --- /dev/null +++ b/last-checkpoint/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3906488e422259322b7d06064404a5ecab931aab5f19b82f239e2f3da07e7f99 +size 7249 diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..3b4c795 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49aef298d5ac0da244b9510e3897fc2feed1ca55b3ae1c237ce1faf27f872639 +size 6171927112 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..34510ff --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..770e41d --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,30 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "local_files_only": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..e74cc58 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3906488e422259322b7d06064404a5ecab931aab5f19b82f239e2f3da07e7f99 +size 7249