commit 371929f9773b9a251d1e71bb4dc9a1660bbce5ba Author: ModelHub XC Date: Wed May 6 12:32:44 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: arnav-yadav/jailbreak-attacker-l1 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..68c451c --- /dev/null +++ b/.gitattributes @@ -0,0 +1,39 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-144/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-50/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..f204d56 --- /dev/null +++ b/README.md @@ -0,0 +1,70 @@ +--- +base_model: unsloth/qwen2.5-1.5b-instruct-unsloth-bnb-4bit +library_name: transformers +model_name: jailbreak-attacker-l1 +tags: +- generated_from_trainer +- trl +- grpo +- hf_jobs +- unsloth +licence: license +--- + +# Model Card for jailbreak-attacker-l1 + +This model is a fine-tuned version of [unsloth/qwen2.5-1.5b-instruct-unsloth-bnb-4bit](https://huggingface.co/unsloth/qwen2.5-1.5b-instruct-unsloth-bnb-4bit). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="None", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + +[Visualize in Weights & Biases](https://wandb.ai/2024eb02510-/jailbreak-arena/runs/tib83q77) + + +This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300). + +### Framework versions + +- TRL: 0.24.0 +- Transformers: 5.5.0 +- Pytorch: 2.10.0 +- Datasets: 4.3.0 +- Tokenizers: 0.22.2 + +## Citations + +Cite GRPO as: + +```bibtex +@article{shao2024deepseekmath, + title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}}, + author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo}, + year = 2024, + eprint = {arXiv:2402.03300}, +} + +``` + +Cite TRL as: + +```bibtex +@misc{vonwerra2022trl, + title = {{TRL: Transformer Reinforcement Learning}}, + author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec}, + year = 2020, + journal = {GitHub repository}, + publisher = {GitHub}, + howpublished = {\url{https://github.com/huggingface/trl}} +} +``` \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..bdf7919 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md new file mode 100644 index 0000000..bf064af --- /dev/null +++ b/checkpoint-100/README.md @@ -0,0 +1,210 @@ +--- +base_model: unsloth/qwen2.5-1.5b-instruct-unsloth-bnb-4bit +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:unsloth/qwen2.5-1.5b-instruct-unsloth-bnb-4bit +- grpo +- lora +- transformers +- trl +- unsloth +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json new file mode 100644 index 0000000..7273498 --- /dev/null +++ b/checkpoint-100/adapter_config.json @@ -0,0 +1,49 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "Qwen2ForCausalLM", + "parent_library": "transformers.models.qwen2.modeling_qwen2", + "unsloth_fixed": true + }, + "base_model_name_or_path": "unsloth/qwen2.5-1.5b-instruct-unsloth-bnb-4bit", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "o_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000..8720a89 --- /dev/null +++ b/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3168fdc394e18642d2931cdb227a43e111b82a57ced84e101c2ed2000c295330 +size 17462432 diff --git a/checkpoint-100/chat_template.jinja b/checkpoint-100/chat_template.jinja new file mode 100644 index 0000000..bdf7919 --- /dev/null +++ b/checkpoint-100/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000..27efe39 --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f74ff341f2c8d4288d4277d7f740448d3487ed92977a29043c16a3ad41fb455e +size 9117451 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100644 index 0000000..444f433 --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5889fede59127771d8188cd8bb489c23824482903daf0eacc40d389989be3f6 +size 14645 diff --git a/checkpoint-100/scaler.pt b/checkpoint-100/scaler.pt new file mode 100644 index 0000000..215c5d2 --- /dev/null +++ b/checkpoint-100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4393a84a3109995aa1202073b039b12062e3189ed89aa0b94ef0510ba843009 +size 1383 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000..d044b4a --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1398ad4f0a069d2583c00c80c339afdb038366a30bb91e2ad8c6134f62d1bcb +size 1465 diff --git a/checkpoint-100/tokenizer.json b/checkpoint-100/tokenizer.json new file mode 100644 index 0000000..9fa63fd --- /dev/null +++ b/checkpoint-100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af5891a15588546db1ac7f2baf8fa94835a51a85c032c39793a55bb048b47446 +size 11422523 diff --git a/checkpoint-100/tokenizer_config.json b/checkpoint-100/tokenizer_config.json new file mode 100644 index 0000000..d3dac53 --- /dev/null +++ b/checkpoint-100/tokenizer_config.json @@ -0,0 +1,201 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "is_local": false, + "model_max_length": 32768, + "pad_token": "<|PAD_TOKEN|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151657": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151658": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151665": { + "content": "<|PAD_TOKEN|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + } +} diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000..a4a8265 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,304 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0833333333333335, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.20833333333333334, + "frac_reward_zero_std": 0.275, + "grad_norm": 0.252999871969223, + "kl": 4.623506138159428e-05, + "learning_rate": 3e-05, + "loss": 7.562339305877686e-08, + "num_tokens": 30976.0, + "reward": -1.63874990940094, + "reward_std": 0.6745969772338867, + "rewards/reward_fn/mean": -1.63874990940094, + "rewards/reward_fn/std": 0.8239436686038971, + "step": 10 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.4166666666666667, + "frac_reward_zero_std": 0.175, + "grad_norm": 0.2783929705619812, + "kl": 0.001219680597569095, + "learning_rate": 4.8449612403100775e-05, + "loss": 1.2345612049102782e-06, + "num_tokens": 62268.0, + "reward": -1.4524999380111694, + "reward_std": 0.7632413059473038, + "rewards/reward_fn/mean": -1.4524999260902405, + "rewards/reward_fn/std": 0.8776885092258453, + "step": 20 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.625, + "frac_reward_zero_std": 0.075, + "grad_norm": 0.310812383890152, + "kl": 0.012675344076706096, + "learning_rate": 4.4573643410852715e-05, + "loss": 1.2670829892158509e-05, + "num_tokens": 93408.0, + "reward": -0.9787499606609344, + "reward_std": 0.8382474899291992, + "rewards/reward_fn/mean": -0.9787499666213989, + "rewards/reward_fn/std": 0.874516224861145, + "step": 30 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.8333333333333334, + "frac_reward_zero_std": 0.075, + "grad_norm": 0.3383573889732361, + "kl": 0.03188293152488768, + "learning_rate": 4.0697674418604655e-05, + "loss": 3.1878240406513214e-05, + "num_tokens": 124444.0, + "reward": -0.7437499940395356, + "reward_std": 0.7179304540157319, + "rewards/reward_fn/mean": -0.7437499821186065, + "rewards/reward_fn/std": 0.7846309900283813, + "step": 40 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 1.0416666666666667, + "frac_reward_zero_std": 0.225, + "grad_norm": 0.35372963547706604, + "kl": 0.04432902028784156, + "learning_rate": 3.682170542635659e-05, + "loss": 4.432052373886109e-05, + "num_tokens": 155440.0, + "reward": -0.6174999922513962, + "reward_std": 0.5982875227928162, + "rewards/reward_fn/mean": -0.6174999952316285, + "rewards/reward_fn/std": 0.698276162147522, + "step": 50 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 1.25, + "frac_reward_zero_std": 0.475, + "grad_norm": 0.19890302419662476, + "kl": 0.06396519551053644, + "learning_rate": 3.294573643410852e-05, + "loss": 6.395205855369568e-05, + "num_tokens": 186420.0, + "reward": -0.4387499988079071, + "reward_std": 0.3215150013566017, + "rewards/reward_fn/mean": -0.4387499988079071, + "rewards/reward_fn/std": 0.48022162914276123, + "step": 60 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 1.4583333333333333, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.3041526675224304, + "kl": 0.08288784278556705, + "learning_rate": 2.9069767441860467e-05, + "loss": 8.288174867630005e-05, + "num_tokens": 217644.0, + "reward": -0.3737500041723251, + "reward_std": 0.2771410092711449, + "rewards/reward_fn/mean": -0.3737500011920929, + "rewards/reward_fn/std": 0.4215263336896896, + "step": 70 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 1.6666666666666665, + "frac_reward_zero_std": 0.725, + "grad_norm": 0.3231094479560852, + "kl": 0.10231811739504337, + "learning_rate": 2.5193798449612404e-05, + "loss": 0.00010231174528598785, + "num_tokens": 248780.0, + "reward": -0.34500000476837156, + "reward_std": 0.17121320217847824, + "rewards/reward_fn/mean": -0.34500000476837156, + "rewards/reward_fn/std": 0.26369245946407316, + "step": 80 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 1.875, + "frac_reward_zero_std": 0.675, + "grad_norm": 0.3435279428958893, + "kl": 0.123704750277102, + "learning_rate": 2.131782945736434e-05, + "loss": 0.0001236904412508011, + "num_tokens": 279836.0, + "reward": -0.3937500029802322, + "reward_std": 0.18749999403953552, + "rewards/reward_fn/mean": -0.3937500029802322, + "rewards/reward_fn/std": 0.29925094842910765, + "step": 90 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 2.0833333333333335, + "frac_reward_zero_std": 0.725, + "grad_norm": 0.0014862708048895001, + "kl": 0.1360227341763675, + "learning_rate": 1.744186046511628e-05, + "loss": 0.0001360199647024274, + "num_tokens": 310908.0, + "reward": -0.33250000327825546, + "reward_std": 0.20633567869663239, + "rewards/reward_fn/mean": -0.3325000062584877, + "rewards/reward_fn/std": 0.3232262939214706, + "step": 100 + } + ], + "logging_steps": 10, + "max_steps": 144, + "num_input_tokens_seen": 310908, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000..3fe73da --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33547e97f55cd0b1dbf9fd38741e33fd3a1cec2c5f66b2cc0a29389fd72e60df +size 6737 diff --git a/checkpoint-144/README.md b/checkpoint-144/README.md new file mode 100644 index 0000000..bf064af --- /dev/null +++ b/checkpoint-144/README.md @@ -0,0 +1,210 @@ +--- +base_model: unsloth/qwen2.5-1.5b-instruct-unsloth-bnb-4bit +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:unsloth/qwen2.5-1.5b-instruct-unsloth-bnb-4bit +- grpo +- lora +- transformers +- trl +- unsloth +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoint-144/adapter_config.json b/checkpoint-144/adapter_config.json new file mode 100644 index 0000000..7273498 --- /dev/null +++ b/checkpoint-144/adapter_config.json @@ -0,0 +1,49 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "Qwen2ForCausalLM", + "parent_library": "transformers.models.qwen2.modeling_qwen2", + "unsloth_fixed": true + }, + "base_model_name_or_path": "unsloth/qwen2.5-1.5b-instruct-unsloth-bnb-4bit", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "o_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-144/adapter_model.safetensors b/checkpoint-144/adapter_model.safetensors new file mode 100644 index 0000000..330b2ae --- /dev/null +++ b/checkpoint-144/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ba2542a3f5318d7b221361da85b8366cf34aea155a45b5a2cb7d0b324a8b085 +size 17462432 diff --git a/checkpoint-144/chat_template.jinja b/checkpoint-144/chat_template.jinja new file mode 100644 index 0000000..bdf7919 --- /dev/null +++ b/checkpoint-144/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/checkpoint-144/optimizer.pt b/checkpoint-144/optimizer.pt new file mode 100644 index 0000000..a9a78aa --- /dev/null +++ b/checkpoint-144/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e39383219cbf3376aa7fcd15f66e03e7ee502632fcf54a71655789a021435dd +size 9117451 diff --git a/checkpoint-144/rng_state.pth b/checkpoint-144/rng_state.pth new file mode 100644 index 0000000..9a96804 --- /dev/null +++ b/checkpoint-144/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb6e8815fed576a90dd6491f2259b8b02e6e04b339371f28e466b4736de8eecf +size 14645 diff --git a/checkpoint-144/scaler.pt b/checkpoint-144/scaler.pt new file mode 100644 index 0000000..9100d4a --- /dev/null +++ b/checkpoint-144/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0069239ee428f0527072473131f7b7bb8f4123503b20bffc7561f20a6e3a9bd1 +size 1383 diff --git a/checkpoint-144/scheduler.pt b/checkpoint-144/scheduler.pt new file mode 100644 index 0000000..67a8a52 --- /dev/null +++ b/checkpoint-144/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afae42abd2a2ec7d9c744ce83d4950e515147c0e0b4e1a69b3a1da6f9181ce83 +size 1465 diff --git a/checkpoint-144/tokenizer.json b/checkpoint-144/tokenizer.json new file mode 100644 index 0000000..9fa63fd --- /dev/null +++ b/checkpoint-144/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af5891a15588546db1ac7f2baf8fa94835a51a85c032c39793a55bb048b47446 +size 11422523 diff --git a/checkpoint-144/tokenizer_config.json b/checkpoint-144/tokenizer_config.json new file mode 100644 index 0000000..d3dac53 --- /dev/null +++ b/checkpoint-144/tokenizer_config.json @@ -0,0 +1,201 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "is_local": false, + "model_max_length": 32768, + "pad_token": "<|PAD_TOKEN|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151657": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151658": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151665": { + "content": "<|PAD_TOKEN|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + } +} diff --git a/checkpoint-144/trainer_state.json b/checkpoint-144/trainer_state.json new file mode 100644 index 0000000..a774ac5 --- /dev/null +++ b/checkpoint-144/trainer_state.json @@ -0,0 +1,412 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 144, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.20833333333333334, + "frac_reward_zero_std": 0.275, + "grad_norm": 0.252999871969223, + "kl": 4.623506138159428e-05, + "learning_rate": 3e-05, + "loss": 7.562339305877686e-08, + "num_tokens": 30976.0, + "reward": -1.63874990940094, + "reward_std": 0.6745969772338867, + "rewards/reward_fn/mean": -1.63874990940094, + "rewards/reward_fn/std": 0.8239436686038971, + "step": 10 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.4166666666666667, + "frac_reward_zero_std": 0.175, + "grad_norm": 0.2783929705619812, + "kl": 0.001219680597569095, + "learning_rate": 4.8449612403100775e-05, + "loss": 1.2345612049102782e-06, + "num_tokens": 62268.0, + "reward": -1.4524999380111694, + "reward_std": 0.7632413059473038, + "rewards/reward_fn/mean": -1.4524999260902405, + "rewards/reward_fn/std": 0.8776885092258453, + "step": 20 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.625, + "frac_reward_zero_std": 0.075, + "grad_norm": 0.310812383890152, + "kl": 0.012675344076706096, + "learning_rate": 4.4573643410852715e-05, + "loss": 1.2670829892158509e-05, + "num_tokens": 93408.0, + "reward": -0.9787499606609344, + "reward_std": 0.8382474899291992, + "rewards/reward_fn/mean": -0.9787499666213989, + "rewards/reward_fn/std": 0.874516224861145, + "step": 30 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.8333333333333334, + "frac_reward_zero_std": 0.075, + "grad_norm": 0.3383573889732361, + "kl": 0.03188293152488768, + "learning_rate": 4.0697674418604655e-05, + "loss": 3.1878240406513214e-05, + "num_tokens": 124444.0, + "reward": -0.7437499940395356, + "reward_std": 0.7179304540157319, + "rewards/reward_fn/mean": -0.7437499821186065, + "rewards/reward_fn/std": 0.7846309900283813, + "step": 40 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 1.0416666666666667, + "frac_reward_zero_std": 0.225, + "grad_norm": 0.35372963547706604, + "kl": 0.04432902028784156, + "learning_rate": 3.682170542635659e-05, + "loss": 4.432052373886109e-05, + "num_tokens": 155440.0, + "reward": -0.6174999922513962, + "reward_std": 0.5982875227928162, + "rewards/reward_fn/mean": -0.6174999952316285, + "rewards/reward_fn/std": 0.698276162147522, + "step": 50 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 1.25, + "frac_reward_zero_std": 0.475, + "grad_norm": 0.19890302419662476, + "kl": 0.06396519551053644, + "learning_rate": 3.294573643410852e-05, + "loss": 6.395205855369568e-05, + "num_tokens": 186420.0, + "reward": -0.4387499988079071, + "reward_std": 0.3215150013566017, + "rewards/reward_fn/mean": -0.4387499988079071, + "rewards/reward_fn/std": 0.48022162914276123, + "step": 60 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 1.4583333333333333, + "frac_reward_zero_std": 0.5, + "grad_norm": 0.3041526675224304, + "kl": 0.08288784278556705, + "learning_rate": 2.9069767441860467e-05, + "loss": 8.288174867630005e-05, + "num_tokens": 217644.0, + "reward": -0.3737500041723251, + "reward_std": 0.2771410092711449, + "rewards/reward_fn/mean": -0.3737500011920929, + "rewards/reward_fn/std": 0.4215263336896896, + "step": 70 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 1.6666666666666665, + "frac_reward_zero_std": 0.725, + "grad_norm": 0.3231094479560852, + "kl": 0.10231811739504337, + "learning_rate": 2.5193798449612404e-05, + "loss": 0.00010231174528598785, + "num_tokens": 248780.0, + "reward": -0.34500000476837156, + "reward_std": 0.17121320217847824, + "rewards/reward_fn/mean": -0.34500000476837156, + "rewards/reward_fn/std": 0.26369245946407316, + "step": 80 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 1.875, + "frac_reward_zero_std": 0.675, + "grad_norm": 0.3435279428958893, + "kl": 0.123704750277102, + "learning_rate": 2.131782945736434e-05, + "loss": 0.0001236904412508011, + "num_tokens": 279836.0, + "reward": -0.3937500029802322, + "reward_std": 0.18749999403953552, + "rewards/reward_fn/mean": -0.3937500029802322, + "rewards/reward_fn/std": 0.29925094842910765, + "step": 90 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 2.0833333333333335, + "frac_reward_zero_std": 0.725, + "grad_norm": 0.0014862708048895001, + "kl": 0.1360227341763675, + "learning_rate": 1.744186046511628e-05, + "loss": 0.0001360199647024274, + "num_tokens": 310908.0, + "reward": -0.33250000327825546, + "reward_std": 0.20633567869663239, + "rewards/reward_fn/mean": -0.3325000062584877, + "rewards/reward_fn/std": 0.3232262939214706, + "step": 100 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 2.2916666666666665, + "frac_reward_zero_std": 0.725, + "grad_norm": 0.2269871085882187, + "kl": 0.11381789557635784, + "learning_rate": 1.3565891472868217e-05, + "loss": 0.00011381134390830994, + "num_tokens": 341928.0, + "reward": -0.3806250035762787, + "reward_std": 0.16293191239237786, + "rewards/reward_fn/mean": -0.38062500655651094, + "rewards/reward_fn/std": 0.2841194272041321, + "step": 110 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 2.5, + "frac_reward_zero_std": 0.9, + "grad_norm": 0.22058050334453583, + "kl": 0.12228449042886495, + "learning_rate": 9.689922480620156e-06, + "loss": 0.0001222832128405571, + "num_tokens": 373136.0, + "reward": -0.318125008046627, + "reward_std": 0.06874999850988388, + "rewards/reward_fn/mean": -0.318125008046627, + "rewards/reward_fn/std": 0.1375, + "step": 120 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 2.7083333333333335, + "frac_reward_zero_std": 0.85, + "grad_norm": 0.003290389198809862, + "kl": 0.12402632609009742, + "learning_rate": 5.8139534883720935e-06, + "loss": 0.00012402099091559648, + "num_tokens": 404156.0, + "reward": -0.3487500101327896, + "reward_std": 0.09749999642372131, + "rewards/reward_fn/mean": -0.3487500071525574, + "rewards/reward_fn/std": 0.14059183597564698, + "step": 130 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 2.9166666666666665, + "frac_reward_zero_std": 0.75, + "grad_norm": 0.45249366760253906, + "kl": 0.12512001674622297, + "learning_rate": 1.937984496124031e-06, + "loss": 0.00012511350214481354, + "num_tokens": 435268.0, + "reward": -0.3462500095367432, + "reward_std": 0.1474999986588955, + "rewards/reward_fn/mean": -0.34625000655651095, + "rewards/reward_fn/std": 0.25199120342731474, + "step": 140 + } + ], + "logging_steps": 10, + "max_steps": 144, + "num_input_tokens_seen": 447744, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-144/training_args.bin b/checkpoint-144/training_args.bin new file mode 100644 index 0000000..3fe73da --- /dev/null +++ b/checkpoint-144/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33547e97f55cd0b1dbf9fd38741e33fd3a1cec2c5f66b2cc0a29389fd72e60df +size 6737 diff --git a/checkpoint-50/README.md b/checkpoint-50/README.md new file mode 100644 index 0000000..bf064af --- /dev/null +++ b/checkpoint-50/README.md @@ -0,0 +1,210 @@ +--- +base_model: unsloth/qwen2.5-1.5b-instruct-unsloth-bnb-4bit +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:unsloth/qwen2.5-1.5b-instruct-unsloth-bnb-4bit +- grpo +- lora +- transformers +- trl +- unsloth +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoint-50/adapter_config.json b/checkpoint-50/adapter_config.json new file mode 100644 index 0000000..7273498 --- /dev/null +++ b/checkpoint-50/adapter_config.json @@ -0,0 +1,49 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "Qwen2ForCausalLM", + "parent_library": "transformers.models.qwen2.modeling_qwen2", + "unsloth_fixed": true + }, + "base_model_name_or_path": "unsloth/qwen2.5-1.5b-instruct-unsloth-bnb-4bit", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "o_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-50/adapter_model.safetensors b/checkpoint-50/adapter_model.safetensors new file mode 100644 index 0000000..d3a5eee --- /dev/null +++ b/checkpoint-50/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:980a21f13d6bfe1bb1b149ecaa25272d6a73a2260ce807abf776808be9a6c41b +size 17462432 diff --git a/checkpoint-50/chat_template.jinja b/checkpoint-50/chat_template.jinja new file mode 100644 index 0000000..bdf7919 --- /dev/null +++ b/checkpoint-50/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/checkpoint-50/optimizer.pt b/checkpoint-50/optimizer.pt new file mode 100644 index 0000000..9fcf512 --- /dev/null +++ b/checkpoint-50/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28cc2251229f8f19a8e504cbce86506695a596124293dcd63a47c327aa26e714 +size 9117451 diff --git a/checkpoint-50/rng_state.pth b/checkpoint-50/rng_state.pth new file mode 100644 index 0000000..2f6efda --- /dev/null +++ b/checkpoint-50/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cab1fef1a86e2af517848cf7bc2f7475817d1a618544100761f00d72fc2c9487 +size 14645 diff --git a/checkpoint-50/scaler.pt b/checkpoint-50/scaler.pt new file mode 100644 index 0000000..48ade11 --- /dev/null +++ b/checkpoint-50/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cd0e9d505fbc3f97feb166d29026132bdf14eb3e5c7ff77beebc303ee666f96 +size 1383 diff --git a/checkpoint-50/scheduler.pt b/checkpoint-50/scheduler.pt new file mode 100644 index 0000000..f64f154 --- /dev/null +++ b/checkpoint-50/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec5b47c60f7119e6edc874b9b7a04c081770ba90c5c8e647d47bee9bb9c059e4 +size 1465 diff --git a/checkpoint-50/tokenizer.json b/checkpoint-50/tokenizer.json new file mode 100644 index 0000000..9fa63fd --- /dev/null +++ b/checkpoint-50/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af5891a15588546db1ac7f2baf8fa94835a51a85c032c39793a55bb048b47446 +size 11422523 diff --git a/checkpoint-50/tokenizer_config.json b/checkpoint-50/tokenizer_config.json new file mode 100644 index 0000000..d3dac53 --- /dev/null +++ b/checkpoint-50/tokenizer_config.json @@ -0,0 +1,201 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "is_local": false, + "model_max_length": 32768, + "pad_token": "<|PAD_TOKEN|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151657": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151658": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151665": { + "content": "<|PAD_TOKEN|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + } +} diff --git a/checkpoint-50/trainer_state.json b/checkpoint-50/trainer_state.json new file mode 100644 index 0000000..cbf0852 --- /dev/null +++ b/checkpoint-50/trainer_state.json @@ -0,0 +1,169 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0416666666666667, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.20833333333333334, + "frac_reward_zero_std": 0.275, + "grad_norm": 0.252999871969223, + "kl": 4.623506138159428e-05, + "learning_rate": 3e-05, + "loss": 7.562339305877686e-08, + "num_tokens": 30976.0, + "reward": -1.63874990940094, + "reward_std": 0.6745969772338867, + "rewards/reward_fn/mean": -1.63874990940094, + "rewards/reward_fn/std": 0.8239436686038971, + "step": 10 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.4166666666666667, + "frac_reward_zero_std": 0.175, + "grad_norm": 0.2783929705619812, + "kl": 0.001219680597569095, + "learning_rate": 4.8449612403100775e-05, + "loss": 1.2345612049102782e-06, + "num_tokens": 62268.0, + "reward": -1.4524999380111694, + "reward_std": 0.7632413059473038, + "rewards/reward_fn/mean": -1.4524999260902405, + "rewards/reward_fn/std": 0.8776885092258453, + "step": 20 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.625, + "frac_reward_zero_std": 0.075, + "grad_norm": 0.310812383890152, + "kl": 0.012675344076706096, + "learning_rate": 4.4573643410852715e-05, + "loss": 1.2670829892158509e-05, + "num_tokens": 93408.0, + "reward": -0.9787499606609344, + "reward_std": 0.8382474899291992, + "rewards/reward_fn/mean": -0.9787499666213989, + "rewards/reward_fn/std": 0.874516224861145, + "step": 30 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.8333333333333334, + "frac_reward_zero_std": 0.075, + "grad_norm": 0.3383573889732361, + "kl": 0.03188293152488768, + "learning_rate": 4.0697674418604655e-05, + "loss": 3.1878240406513214e-05, + "num_tokens": 124444.0, + "reward": -0.7437499940395356, + "reward_std": 0.7179304540157319, + "rewards/reward_fn/mean": -0.7437499821186065, + "rewards/reward_fn/std": 0.7846309900283813, + "step": 40 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completion_length": 100.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 100.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 100.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 100.0, + "completions/min_terminated_length": 0.0, + "epoch": 1.0416666666666667, + "frac_reward_zero_std": 0.225, + "grad_norm": 0.35372963547706604, + "kl": 0.04432902028784156, + "learning_rate": 3.682170542635659e-05, + "loss": 4.432052373886109e-05, + "num_tokens": 155440.0, + "reward": -0.6174999922513962, + "reward_std": 0.5982875227928162, + "rewards/reward_fn/mean": -0.6174999952316285, + "rewards/reward_fn/std": 0.698276162147522, + "step": 50 + } + ], + "logging_steps": 10, + "max_steps": 144, + "num_input_tokens_seen": 155440, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-50/training_args.bin b/checkpoint-50/training_args.bin new file mode 100644 index 0000000..3fe73da --- /dev/null +++ b/checkpoint-50/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33547e97f55cd0b1dbf9fd38741e33fd3a1cec2c5f66b2cc0a29389fd72e60df +size 6737 diff --git a/config.json b/config.json new file mode 100644 index 0000000..c8aa5c7 --- /dev/null +++ b/config.json @@ -0,0 +1,62 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": null, + "torch_dtype": "float16", + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 21, + "model_type": "qwen2", + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "pad_token_id": 151665, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 1000000.0, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "unsloth_version": "2026.4.8", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..20cf17a --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b65052ca2b298651a357ab726953d531801a48cb96723c871ca52bf5c3d2f456 +size 3087467144 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..9fa63fd --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af5891a15588546db1ac7f2baf8fa94835a51a85c032c39793a55bb048b47446 +size 11422523 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..544df20 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,202 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "is_local": false, + "model_max_length": 32768, + "pad_token": "<|PAD_TOKEN|>", + "padding_side": "left", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "151657": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151658": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "151665": { + "content": "<|PAD_TOKEN|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + }, + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n" +} \ No newline at end of file