commit 0ebbfc36ad43fde2e5a6cc11635bcc43cb825eec Author: ModelHub XC Date: Fri Apr 10 19:56:58 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: kangdawei/DAPO-8B Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..b19d564 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,37 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +adapter/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..2538974 --- /dev/null +++ b/README.md @@ -0,0 +1,70 @@ +--- +base_model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B +datasets: knoveleng/open-rs +library_name: transformers +model_name: DAPO-8B +tags: +- generated_from_trainer +- open-r1 +- dapo +- trl +licence: license +--- + +# Model Card for DAPO-8B + +This model is a fine-tuned version of [deepseek-ai/DeepSeek-R1-Distill-Llama-8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) on the [knoveleng/open-rs](https://huggingface.co/datasets/knoveleng/open-rs) dataset. +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="kangdawei/DAPO-8B", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + + + + +This model was trained with DAPO, a method introduced in [DAPO: An Open-Source LLM Reinforcement Learning System at Scale](https://huggingface.co/papers/2503.14476). + +### Framework versions + +- TRL: 0.16.0.dev0 +- Transformers: 4.57.1 +- Pytorch: 2.5.1 +- Datasets: 3.2.0 +- Tokenizers: 0.22.1 + +## Citations + +Cite DAPO as: + +```bibtex +@article{yu2025dapo, + title = {{DAPO: An Open-Source LLM Reinforcement Learning System at Scale}}, + author = {Qiying Yu and Zheng Zhang and others}, + year = 2025, + eprint = {arXiv:2503.14476}, +} + +``` + +Cite TRL as: + +```bibtex +@misc{vonwerra2022trl, + title = {{TRL: Transformer Reinforcement Learning}}, + author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec}, + year = 2020, + journal = {GitHub repository}, + publisher = {GitHub}, + howpublished = {\url{https://github.com/huggingface/trl}} +} +``` \ No newline at end of file diff --git a/adapter/README.md b/adapter/README.md new file mode 100644 index 0000000..da11736 --- /dev/null +++ b/adapter/README.md @@ -0,0 +1,209 @@ +--- +base_model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:deepseek-ai/DeepSeek-R1-Distill-Llama-8B +- dapo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.0 \ No newline at end of file diff --git a/adapter/adapter_config.json b/adapter/adapter_config.json new file mode 100644 index 0000000..87dc5e1 --- /dev/null +++ b/adapter/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.0", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "v_proj", + "down_proj", + "gate_proj", + "up_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter/adapter_model.safetensors b/adapter/adapter_model.safetensors new file mode 100644 index 0000000..6cab77b --- /dev/null +++ b/adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed5057e58d9c86f27f25bff17f93ec31710991515dde331e78b702be3b250793 +size 335605144 diff --git a/adapter/chat_template.jinja b/adapter/chat_template.jinja new file mode 100644 index 0000000..c2066bd --- /dev/null +++ b/adapter/chat_template.jinja @@ -0,0 +1 @@ +{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\n'}}{% endif %} \ No newline at end of file diff --git a/adapter/special_tokens_map.json b/adapter/special_tokens_map.json new file mode 100644 index 0000000..1d385d6 --- /dev/null +++ b/adapter/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/adapter/tokenizer.json b/adapter/tokenizer.json new file mode 100644 index 0000000..e77a163 --- /dev/null +++ b/adapter/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26c881aaf4ef935b1516ec79ad6405dd2a459f2b5d431a8a4a1399c92f3ba022 +size 17209711 diff --git a/adapter/tokenizer_config.json b/adapter/tokenizer_config.json new file mode 100644 index 0000000..dd34db6 --- /dev/null +++ b/adapter/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": null, + "added_tokens_decoder": { + "128000": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128012": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128013": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128014": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128015": { + "content": "<|▁pad▁|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin▁of▁sentence|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|end▁of▁sentence|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 16384, + "pad_token": "<|end▁of▁sentence|>", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizerFast", + "unk_token": null, + "use_default_system_prompt": false +} diff --git a/adapter/training_args.bin b/adapter/training_args.bin new file mode 100644 index 0000000..7c12e17 --- /dev/null +++ b/adapter/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f226307e55b90254d38ea4c353a35911a575a170c592310101fe03a927c9dc78 +size 8760 diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000..87dc5e1 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.0", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "v_proj", + "down_proj", + "gate_proj", + "up_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000..ba3b119 --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60d95b10b6e140a9626a7058d5038528f2ff80148dc4569b881db56052046509 +size 40 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..db68460 --- /dev/null +++ b/all_results.json @@ -0,0 +1,8 @@ +{ + "total_flos": 0.0, + "train_loss": 0.009447227440541611, + "train_runtime": 101500.2967, + "train_samples": 7000, + "train_samples_per_second": 0.095, + "train_steps_per_second": 0.002 +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..c2066bd --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1 @@ +{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\n'}}{% endif %} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..3a7db4d --- /dev/null +++ b/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "dtype": "bfloat16", + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "transformers_version": "4.57.1", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/dynamic_sampling_log.txt b/dynamic_sampling_log.txt new file mode 100644 index 0000000..b7ea4ea --- /dev/null +++ b/dynamic_sampling_log.txt @@ -0,0 +1,1685 @@ +Dynamic Sampling Statistics Log +Started at: 2025-12-11 14:28:49 +================================================================================ +Step Attempts Total Prompts Valid Prompts Efficiency +================================================================================ +0 3 18 6 33.3 % +0 1 6 6 100.0 % +0 1 6 6 100.0 % +0 4 24 6 25.0 % +0 4 24 6 25.0 % +0 3 18 6 33.3 % +0 3 18 6 33.3 % +0 6 36 6 16.7 % +1 4 24 6 25.0 % +1 2 12 6 50.0 % +1 6 36 6 16.7 % +1 3 18 6 33.3 % +1 4 24 6 25.0 % +1 4 24 6 25.0 % +1 3 18 6 33.3 % +1 2 12 6 50.0 % +2 5 30 6 20.0 % +2 4 24 6 25.0 % +2 2 12 6 50.0 % +2 3 18 6 33.3 % +2 5 30 6 20.0 % +2 1 6 6 100.0 % +2 4 24 6 25.0 % +2 3 18 6 33.3 % +3 5 30 6 20.0 % +3 8 48 6 12.5 % +3 3 18 6 33.3 % +3 3 18 6 33.3 % +3 5 30 6 20.0 % +3 2 12 6 50.0 % +3 3 18 6 33.3 % +3 3 18 6 33.3 % +4 4 24 6 25.0 % +4 4 24 6 25.0 % +4 4 24 6 25.0 % +4 6 36 6 16.7 % +4 3 18 6 33.3 % +4 1 6 6 100.0 % +4 3 18 6 33.3 % +4 6 36 6 16.7 % +5 4 24 6 25.0 % +5 2 12 6 50.0 % +5 3 18 6 33.3 % +5 2 12 6 50.0 % +5 2 12 6 50.0 % +5 2 12 6 50.0 % +5 3 18 6 33.3 % +5 4 24 6 25.0 % +6 2 12 6 50.0 % +6 6 36 6 16.7 % +6 4 24 6 25.0 % +6 3 18 6 33.3 % +6 3 18 6 33.3 % +6 3 18 6 33.3 % +6 4 24 6 25.0 % +6 2 12 6 50.0 % +7 4 24 6 25.0 % +7 4 24 6 25.0 % +7 4 24 6 25.0 % +7 7 42 6 14.3 % +7 4 24 6 25.0 % +7 4 24 6 25.0 % +7 2 12 6 50.0 % +7 3 18 6 33.3 % +8 6 36 6 16.7 % +8 3 18 6 33.3 % +8 2 12 6 50.0 % +8 1 6 6 100.0 % +8 1 6 6 100.0 % +8 6 36 6 16.7 % +8 1 6 6 100.0 % +8 3 18 6 33.3 % +9 5 30 6 20.0 % +9 4 24 6 25.0 % +9 4 24 6 25.0 % +9 1 6 6 100.0 % +9 1 6 6 100.0 % +9 1 6 6 100.0 % +9 4 24 6 25.0 % +9 1 6 6 100.0 % +10 4 24 6 25.0 % +10 2 12 6 50.0 % +10 5 30 6 20.0 % +10 7 42 6 14.3 % +10 4 24 6 25.0 % +10 13 78 6 7.7 % +10 5 30 6 20.0 % +10 5 30 6 20.0 % +11 6 36 6 16.7 % +11 4 24 6 25.0 % +11 4 24 6 25.0 % +11 1 6 6 100.0 % +11 10 60 6 10.0 % +11 4 24 6 25.0 % +11 6 36 6 16.7 % +11 5 30 6 20.0 % +12 2 12 6 50.0 % +12 2 12 6 50.0 % +12 10 60 6 10.0 % +12 4 24 6 25.0 % +12 1 6 6 100.0 % +12 3 18 6 33.3 % +12 3 18 6 33.3 % +12 2 12 6 50.0 % +13 5 30 6 20.0 % +13 2 12 6 50.0 % +13 12 72 6 8.3 % +13 1 6 6 100.0 % +13 4 24 6 25.0 % +13 5 30 6 20.0 % +13 2 12 6 50.0 % +13 2 12 6 50.0 % +14 4 24 6 25.0 % +14 1 6 6 100.0 % +14 4 24 6 25.0 % +14 1 6 6 100.0 % +14 3 18 6 33.3 % +14 3 18 6 33.3 % +14 2 12 6 50.0 % +14 3 18 6 33.3 % +15 2 12 6 50.0 % +15 3 18 6 33.3 % +15 6 36 6 16.7 % +15 1 6 6 100.0 % +15 5 30 6 20.0 % +15 1 6 6 100.0 % +15 2 12 6 50.0 % +15 4 24 6 25.0 % +16 4 24 6 25.0 % +16 1 6 6 100.0 % +16 3 18 6 33.3 % +16 10 60 6 10.0 % +16 1 6 6 100.0 % +16 6 36 6 16.7 % +16 1 6 6 100.0 % +16 4 24 6 25.0 % +17 3 18 6 33.3 % +17 4 24 6 25.0 % +17 3 18 6 33.3 % +17 2 12 6 50.0 % +17 2 12 6 50.0 % +17 2 12 6 50.0 % +17 1 6 6 100.0 % +17 3 18 6 33.3 % +18 3 18 6 33.3 % +18 3 18 6 33.3 % +18 2 12 6 50.0 % +18 10 60 6 10.0 % +18 3 18 6 33.3 % +18 9 54 6 11.1 % +18 4 24 6 25.0 % +18 5 30 6 20.0 % +19 7 42 6 14.3 % +19 6 36 6 16.7 % +19 1 6 6 100.0 % +19 6 36 6 16.7 % +19 1 6 6 100.0 % +19 3 18 6 33.3 % +19 4 24 6 25.0 % +19 5 30 6 20.0 % +20 3 18 6 33.3 % +20 6 36 6 16.7 % +20 2 12 6 50.0 % +20 6 36 6 16.7 % +20 1 6 6 100.0 % +20 8 48 6 12.5 % +20 9 54 6 11.1 % +20 5 30 6 20.0 % +21 3 18 6 33.3 % +21 5 30 6 20.0 % +21 6 36 6 16.7 % +21 2 12 6 50.0 % +21 6 36 6 16.7 % +21 4 24 6 25.0 % +21 5 30 6 20.0 % +21 3 18 6 33.3 % +22 4 24 6 25.0 % +22 3 18 6 33.3 % +22 4 24 6 25.0 % +22 2 12 6 50.0 % +22 3 18 6 33.3 % +22 6 36 6 16.7 % +22 3 18 6 33.3 % +22 3 18 6 33.3 % +23 8 48 6 12.5 % +23 3 18 6 33.3 % +23 4 24 6 25.0 % +23 1 6 6 100.0 % +23 3 18 6 33.3 % +23 2 12 6 50.0 % +23 2 12 6 50.0 % +23 2 12 6 50.0 % +24 10 60 6 10.0 % +24 6 36 6 16.7 % +24 4 24 6 25.0 % +24 6 36 6 16.7 % +24 15 90 6 6.7 % +24 5 30 6 20.0 % +24 3 18 6 33.3 % +24 4 24 6 25.0 % +25 2 12 6 50.0 % +25 1 6 6 100.0 % +25 4 24 6 25.0 % +25 4 24 6 25.0 % +25 1 6 6 100.0 % +25 8 48 6 12.5 % +25 4 24 6 25.0 % +25 5 30 6 20.0 % +26 4 24 6 25.0 % +26 4 24 6 25.0 % +26 3 18 6 33.3 % +26 6 36 6 16.7 % +26 6 36 6 16.7 % +26 3 18 6 33.3 % +26 9 54 6 11.1 % +26 5 30 6 20.0 % +27 6 36 6 16.7 % +27 2 12 6 50.0 % +27 1 6 6 100.0 % +27 3 18 6 33.3 % +27 4 24 6 25.0 % +27 6 36 6 16.7 % +27 5 30 6 20.0 % +27 7 42 6 14.3 % +28 1 6 6 100.0 % +28 5 30 6 20.0 % +28 7 42 6 14.3 % +28 1 6 6 100.0 % +28 4 24 6 25.0 % +28 7 42 6 14.3 % +28 7 42 6 14.3 % +28 1 6 6 100.0 % +29 1 6 6 100.0 % +29 3 18 6 33.3 % +29 11 66 6 9.1 % +29 6 36 6 16.7 % +29 2 12 6 50.0 % +29 2 12 6 50.0 % +29 1 6 6 100.0 % +29 15 90 6 6.7 % +30 2 12 6 50.0 % +30 2 12 6 50.0 % +30 2 12 6 50.0 % +30 1 6 6 100.0 % +30 7 42 6 14.3 % +30 4 24 6 25.0 % +30 1 6 6 100.0 % +30 6 36 6 16.7 % +31 6 36 6 16.7 % +31 2 12 6 50.0 % +31 2 12 6 50.0 % +31 9 54 6 11.1 % +31 3 18 6 33.3 % +31 3 18 6 33.3 % +31 2 12 6 50.0 % +31 4 24 6 25.0 % +32 1 6 6 100.0 % +32 6 36 6 16.7 % +32 3 18 6 33.3 % +32 3 18 6 33.3 % +32 11 66 6 9.1 % +32 2 12 6 50.0 % +32 6 36 6 16.7 % +32 4 24 6 25.0 % +33 3 18 6 33.3 % +33 3 18 6 33.3 % +33 2 12 6 50.0 % +33 4 24 6 25.0 % +33 6 36 6 16.7 % +33 1 6 6 100.0 % +33 3 18 6 33.3 % +33 2 12 6 50.0 % +34 3 18 6 33.3 % +34 6 36 6 16.7 % +34 7 42 6 14.3 % +34 4 24 6 25.0 % +34 4 24 6 25.0 % +34 2 12 6 50.0 % +34 2 12 6 50.0 % +34 2 12 6 50.0 % +35 4 24 6 25.0 % +35 3 18 6 33.3 % +35 3 18 6 33.3 % +35 6 36 6 16.7 % +35 8 48 6 12.5 % +35 4 24 6 25.0 % +35 7 42 6 14.3 % +35 5 30 6 20.0 % +36 5 30 6 20.0 % +36 2 12 6 50.0 % +36 3 18 6 33.3 % +36 4 24 6 25.0 % +36 5 30 6 20.0 % +36 3 18 6 33.3 % +36 2 12 6 50.0 % +36 2 12 6 50.0 % +37 5 30 6 20.0 % +37 12 72 6 8.3 % +37 4 24 6 25.0 % +37 2 12 6 50.0 % +37 5 30 6 20.0 % +37 10 60 6 10.0 % +37 2 12 6 50.0 % +37 5 30 6 20.0 % +38 3 18 6 33.3 % +38 5 30 6 20.0 % +38 6 36 6 16.7 % +38 7 42 6 14.3 % +38 4 24 6 25.0 % +38 3 18 6 33.3 % +38 3 18 6 33.3 % +38 3 18 6 33.3 % +39 3 18 6 33.3 % +39 2 12 6 50.0 % +39 4 24 6 25.0 % +39 9 54 6 11.1 % +39 2 12 6 50.0 % +39 3 18 6 33.3 % +39 9 54 6 11.1 % +39 4 24 6 25.0 % +40 2 12 6 50.0 % +40 10 60 6 10.0 % +40 3 18 6 33.3 % +40 5 30 6 20.0 % +40 3 18 6 33.3 % +40 3 18 6 33.3 % +40 4 24 6 25.0 % +40 3 18 6 33.3 % +41 3 18 6 33.3 % +41 4 24 6 25.0 % +41 4 24 6 25.0 % +41 4 24 6 25.0 % +41 4 24 6 25.0 % +41 1 6 6 100.0 % +41 6 36 6 16.7 % +41 3 18 6 33.3 % +42 3 18 6 33.3 % +42 2 12 6 50.0 % +42 5 30 6 20.0 % +42 4 24 6 25.0 % +42 2 12 6 50.0 % +42 4 24 6 25.0 % +42 6 36 6 16.7 % +42 5 30 6 20.0 % +43 8 48 6 12.5 % +43 1 6 6 100.0 % +43 7 42 6 14.3 % +43 8 48 6 12.5 % +43 6 36 6 16.7 % +43 1 6 6 100.0 % +43 3 18 6 33.3 % +43 2 12 6 50.0 % +44 8 48 6 12.5 % +44 2 12 6 50.0 % +44 2 12 6 50.0 % +44 2 12 6 50.0 % +44 4 24 6 25.0 % +44 5 30 6 20.0 % +44 3 18 6 33.3 % +44 4 24 6 25.0 % +45 5 30 6 20.0 % +45 4 24 6 25.0 % +45 1 6 6 100.0 % +45 3 18 6 33.3 % +45 9 54 6 11.1 % +45 4 24 6 25.0 % +45 3 18 6 33.3 % +45 2 12 6 50.0 % +46 5 30 6 20.0 % +46 4 24 6 25.0 % +46 4 24 6 25.0 % +46 11 66 6 9.1 % +46 5 30 6 20.0 % +46 3 18 6 33.3 % +46 5 30 6 20.0 % +46 9 54 6 11.1 % +47 6 36 6 16.7 % +47 1 6 6 100.0 % +47 9 54 6 11.1 % +47 4 24 6 25.0 % +47 4 24 6 25.0 % +47 4 24 6 25.0 % +47 1 6 6 100.0 % +47 8 48 6 12.5 % +48 2 12 6 50.0 % +48 6 36 6 16.7 % +48 1 6 6 100.0 % +48 1 6 6 100.0 % +48 7 42 6 14.3 % +48 11 66 6 9.1 % +48 1 6 6 100.0 % +48 4 24 6 25.0 % +49 1 6 6 100.0 % +49 2 12 6 50.0 % +49 2 12 6 50.0 % +49 1 6 6 100.0 % +49 2 12 6 50.0 % +49 10 60 6 10.0 % +49 5 30 6 20.0 % +49 3 18 6 33.3 % +50 7 42 6 14.3 % +50 3 18 6 33.3 % +50 2 12 6 50.0 % +50 4 24 6 25.0 % +50 4 24 6 25.0 % +50 9 54 6 11.1 % +50 3 18 6 33.3 % +50 4 24 6 25.0 % +51 2 12 6 50.0 % +51 3 18 6 33.3 % +51 1 6 6 100.0 % +51 2 12 6 50.0 % +51 6 36 6 16.7 % +51 2 12 6 50.0 % +51 6 36 6 16.7 % +51 7 42 6 14.3 % +52 5 30 6 20.0 % +52 1 6 6 100.0 % +52 5 30 6 20.0 % +52 5 30 6 20.0 % +52 3 18 6 33.3 % +52 5 30 6 20.0 % +52 4 24 6 25.0 % +52 8 48 6 12.5 % +53 1 6 6 100.0 % +53 2 12 6 50.0 % +53 8 48 6 12.5 % +53 1 6 6 100.0 % +53 2 12 6 50.0 % +53 5 30 6 20.0 % +53 5 30 6 20.0 % +53 1 6 6 100.0 % +54 8 48 6 12.5 % +54 2 12 6 50.0 % +54 1 6 6 100.0 % +54 3 18 6 33.3 % +54 4 24 6 25.0 % +54 3 18 6 33.3 % +54 2 12 6 50.0 % +54 3 18 6 33.3 % +55 4 24 6 25.0 % +55 9 54 6 11.1 % +55 3 18 6 33.3 % +55 3 18 6 33.3 % +55 3 18 6 33.3 % +55 6 36 6 16.7 % +55 1 6 6 100.0 % +55 4 24 6 25.0 % +56 4 24 6 25.0 % +56 2 12 6 50.0 % +56 1 6 6 100.0 % +56 3 18 6 33.3 % +56 3 18 6 33.3 % +56 3 18 6 33.3 % +56 4 24 6 25.0 % +56 3 18 6 33.3 % +57 2 12 6 50.0 % +57 1 6 6 100.0 % +57 7 42 6 14.3 % +57 3 18 6 33.3 % +57 4 24 6 25.0 % +57 9 54 6 11.1 % +57 7 42 6 14.3 % +57 1 6 6 100.0 % +58 8 48 6 12.5 % +58 2 12 6 50.0 % +58 3 18 6 33.3 % +58 7 42 6 14.3 % +58 2 12 6 50.0 % +58 5 30 6 20.0 % +58 9 54 6 11.1 % +58 3 18 6 33.3 % +59 3 18 6 33.3 % +59 3 18 6 33.3 % +59 2 12 6 50.0 % +59 1 6 6 100.0 % +59 7 42 6 14.3 % +59 1 6 6 100.0 % +59 3 18 6 33.3 % +59 1 6 6 100.0 % +60 3 18 6 33.3 % +60 2 12 6 50.0 % +60 4 24 6 25.0 % +60 1 6 6 100.0 % +60 3 18 6 33.3 % +60 12 72 6 8.3 % +60 13 78 6 7.7 % +60 2 12 6 50.0 % +61 6 36 6 16.7 % +61 1 6 6 100.0 % +61 3 18 6 33.3 % +61 2 12 6 50.0 % +61 2 12 6 50.0 % +61 2 12 6 50.0 % +61 1 6 6 100.0 % +61 5 30 6 20.0 % +62 3 18 6 33.3 % +62 3 18 6 33.3 % +62 4 24 6 25.0 % +62 4 24 6 25.0 % +62 6 36 6 16.7 % +62 3 18 6 33.3 % +62 2 12 6 50.0 % +62 2 12 6 50.0 % +63 4 24 6 25.0 % +63 4 24 6 25.0 % +63 3 18 6 33.3 % +63 4 24 6 25.0 % +63 3 18 6 33.3 % +63 1 6 6 100.0 % +63 6 36 6 16.7 % +63 2 12 6 50.0 % +64 3 18 6 33.3 % +64 6 36 6 16.7 % +64 2 12 6 50.0 % +64 2 12 6 50.0 % +64 4 24 6 25.0 % +64 6 36 6 16.7 % +64 1 6 6 100.0 % +64 4 24 6 25.0 % +65 4 24 6 25.0 % +65 3 18 6 33.3 % +65 2 12 6 50.0 % +65 5 30 6 20.0 % +65 3 18 6 33.3 % +65 3 18 6 33.3 % +65 6 36 6 16.7 % +65 4 24 6 25.0 % +66 1 6 6 100.0 % +66 5 30 6 20.0 % +66 1 6 6 100.0 % +66 3 18 6 33.3 % +66 9 54 6 11.1 % +66 7 42 6 14.3 % +66 4 24 6 25.0 % +66 7 42 6 14.3 % +67 5 30 6 20.0 % +67 5 30 6 20.0 % +67 1 6 6 100.0 % +67 5 30 6 20.0 % +67 9 54 6 11.1 % +67 3 18 6 33.3 % +67 1 6 6 100.0 % +67 6 36 6 16.7 % +68 3 18 6 33.3 % +68 1 6 6 100.0 % +68 1 6 6 100.0 % +68 4 24 6 25.0 % +68 2 12 6 50.0 % +68 2 12 6 50.0 % +68 2 12 6 50.0 % +68 2 12 6 50.0 % +69 2 12 6 50.0 % +69 3 18 6 33.3 % +69 5 30 6 20.0 % +69 7 42 6 14.3 % +69 1 6 6 100.0 % +69 1 6 6 100.0 % +69 2 12 6 50.0 % +69 3 18 6 33.3 % +70 5 30 6 20.0 % +70 3 18 6 33.3 % +70 2 12 6 50.0 % +70 2 12 6 50.0 % +70 6 36 6 16.7 % +70 4 24 6 25.0 % +70 3 18 6 33.3 % +70 4 24 6 25.0 % +71 2 12 6 50.0 % +71 1 6 6 100.0 % +71 4 24 6 25.0 % +71 3 18 6 33.3 % +71 10 60 6 10.0 % +71 2 12 6 50.0 % +71 2 12 6 50.0 % +71 2 12 6 50.0 % +72 9 54 6 11.1 % +72 4 24 6 25.0 % +72 1 6 6 100.0 % +72 4 24 6 25.0 % +72 2 12 6 50.0 % +72 2 12 6 50.0 % +72 4 24 6 25.0 % +72 6 36 6 16.7 % +73 2 12 6 50.0 % +73 6 36 6 16.7 % +73 3 18 6 33.3 % +73 3 18 6 33.3 % +73 1 6 6 100.0 % +73 5 30 6 20.0 % +73 1 6 6 100.0 % +73 4 24 6 25.0 % +74 4 24 6 25.0 % +74 7 42 6 14.3 % +74 4 24 6 25.0 % +74 3 18 6 33.3 % +74 3 18 6 33.3 % +74 2 12 6 50.0 % +74 2 12 6 50.0 % +74 2 12 6 50.0 % +75 2 12 6 50.0 % +75 2 12 6 50.0 % +75 2 12 6 50.0 % +75 4 24 6 25.0 % +75 7 42 6 14.3 % +75 4 24 6 25.0 % +75 3 18 6 33.3 % +75 9 54 6 11.1 % +76 5 30 6 20.0 % +76 5 30 6 20.0 % +76 8 48 6 12.5 % +76 3 18 6 33.3 % +76 4 24 6 25.0 % +76 8 48 6 12.5 % +76 2 12 6 50.0 % +76 7 42 6 14.3 % +77 8 48 6 12.5 % +77 2 12 6 50.0 % +77 3 18 6 33.3 % +77 4 24 6 25.0 % +77 4 24 6 25.0 % +77 1 6 6 100.0 % +77 1 6 6 100.0 % +77 4 24 6 25.0 % +78 3 18 6 33.3 % +78 6 36 6 16.7 % +78 5 30 6 20.0 % +78 4 24 6 25.0 % +78 2 12 6 50.0 % +78 2 12 6 50.0 % +78 1 6 6 100.0 % +78 1 6 6 100.0 % +79 4 24 6 25.0 % +79 9 54 6 11.1 % +79 2 12 6 50.0 % +79 5 30 6 20.0 % +79 5 30 6 20.0 % +79 1 6 6 100.0 % +79 2 12 6 50.0 % +79 5 30 6 20.0 % +80 3 18 6 33.3 % +80 5 30 6 20.0 % +80 3 18 6 33.3 % +80 4 24 6 25.0 % +80 2 12 6 50.0 % +80 2 12 6 50.0 % +80 5 30 6 20.0 % +80 2 12 6 50.0 % +81 7 42 6 14.3 % +81 5 30 6 20.0 % +81 3 18 6 33.3 % +80 3 18 6 33.3 % +80 8 48 6 12.5 % +80 3 18 6 33.3 % +80 4 24 6 25.0 % +80 5 30 6 20.0 % +80 2 12 6 50.0 % +80 2 12 6 50.0 % +80 1 6 6 100.0 % +81 2 12 6 50.0 % +81 9 54 6 11.1 % +81 3 18 6 33.3 % +81 5 30 6 20.0 % +81 5 30 6 20.0 % +81 2 12 6 50.0 % +81 3 18 6 33.3 % +81 1 6 6 100.0 % +82 3 18 6 33.3 % +82 2 12 6 50.0 % +82 2 12 6 50.0 % +82 1 6 6 100.0 % +82 1 6 6 100.0 % +82 10 60 6 10.0 % +82 6 36 6 16.7 % +82 3 18 6 33.3 % +83 1 6 6 100.0 % +83 6 36 6 16.7 % +83 15 90 6 6.7 % +83 3 18 6 33.3 % +83 5 30 6 20.0 % +83 6 36 6 16.7 % +83 2 12 6 50.0 % +83 4 24 6 25.0 % +84 3 18 6 33.3 % +84 1 6 6 100.0 % +84 8 48 6 12.5 % +84 4 24 6 25.0 % +84 5 30 6 20.0 % +84 4 24 6 25.0 % +84 4 24 6 25.0 % +84 2 12 6 50.0 % +85 6 36 6 16.7 % +85 1 6 6 100.0 % +85 6 36 6 16.7 % +85 2 12 6 50.0 % +85 8 48 6 12.5 % +85 2 12 6 50.0 % +85 5 30 6 20.0 % +85 6 36 6 16.7 % +86 1 6 6 100.0 % +86 1 6 6 100.0 % +86 4 24 6 25.0 % +86 4 24 6 25.0 % +86 3 18 6 33.3 % +86 3 18 6 33.3 % +86 12 72 6 8.3 % +86 2 12 6 50.0 % +87 8 48 6 12.5 % +87 3 18 6 33.3 % +87 4 24 6 25.0 % +87 1 6 6 100.0 % +87 9 54 6 11.1 % +87 5 30 6 20.0 % +87 2 12 6 50.0 % +87 3 18 6 33.3 % +88 7 42 6 14.3 % +88 4 24 6 25.0 % +88 8 48 6 12.5 % +88 1 6 6 100.0 % +88 2 12 6 50.0 % +88 12 72 6 8.3 % +88 5 30 6 20.0 % +88 2 12 6 50.0 % +89 3 18 6 33.3 % +89 7 42 6 14.3 % +89 4 24 6 25.0 % +89 1 6 6 100.0 % +89 3 18 6 33.3 % +89 4 24 6 25.0 % +89 5 30 6 20.0 % +89 2 12 6 50.0 % +90 2 12 6 50.0 % +90 3 18 6 33.3 % +90 8 48 6 12.5 % +90 3 18 6 33.3 % +90 1 6 6 100.0 % +90 1 6 6 100.0 % +90 2 12 6 50.0 % +90 3 18 6 33.3 % +91 1 6 6 100.0 % +91 6 36 6 16.7 % +91 4 24 6 25.0 % +91 2 12 6 50.0 % +91 11 66 6 9.1 % +91 2 12 6 50.0 % +91 7 42 6 14.3 % +91 5 30 6 20.0 % +92 4 24 6 25.0 % +92 3 18 6 33.3 % +92 3 18 6 33.3 % +92 6 36 6 16.7 % +92 1 6 6 100.0 % +92 5 30 6 20.0 % +92 3 18 6 33.3 % +92 6 36 6 16.7 % +93 5 30 6 20.0 % +93 4 24 6 25.0 % +93 2 12 6 50.0 % +93 4 24 6 25.0 % +93 3 18 6 33.3 % +93 2 12 6 50.0 % +93 5 30 6 20.0 % +93 1 6 6 100.0 % +94 5 30 6 20.0 % +94 6 36 6 16.7 % +94 7 42 6 14.3 % +94 2 12 6 50.0 % +94 3 18 6 33.3 % +94 6 36 6 16.7 % +94 6 36 6 16.7 % +94 4 24 6 25.0 % +95 1 6 6 100.0 % +95 2 12 6 50.0 % +95 3 18 6 33.3 % +95 4 24 6 25.0 % +95 3 18 6 33.3 % +95 4 24 6 25.0 % +95 3 18 6 33.3 % +95 2 12 6 50.0 % +96 4 24 6 25.0 % +96 2 12 6 50.0 % +96 5 30 6 20.0 % +96 6 36 6 16.7 % +96 1 6 6 100.0 % +96 3 18 6 33.3 % +96 2 12 6 50.0 % +96 9 54 6 11.1 % +97 6 36 6 16.7 % +97 2 12 6 50.0 % +97 5 30 6 20.0 % +97 7 42 6 14.3 % +97 1 6 6 100.0 % +97 2 12 6 50.0 % +97 8 48 6 12.5 % +97 7 42 6 14.3 % +98 3 18 6 33.3 % +98 3 18 6 33.3 % +98 3 18 6 33.3 % +98 3 18 6 33.3 % +98 2 12 6 50.0 % +98 4 24 6 25.0 % +98 1 6 6 100.0 % +98 8 48 6 12.5 % +99 2 12 6 50.0 % +99 3 18 6 33.3 % +99 11 66 6 9.1 % +99 4 24 6 25.0 % +99 8 48 6 12.5 % +99 3 18 6 33.3 % +99 2 12 6 50.0 % +99 3 18 6 33.3 % +100 2 12 6 50.0 % +100 3 18 6 33.3 % +100 3 18 6 33.3 % +100 3 18 6 33.3 % +100 1 6 6 100.0 % +100 6 36 6 16.7 % +100 3 18 6 33.3 % +100 2 12 6 50.0 % +101 3 18 6 33.3 % +101 4 24 6 25.0 % +101 4 24 6 25.0 % +101 5 30 6 20.0 % +101 4 24 6 25.0 % +101 3 18 6 33.3 % +101 1 6 6 100.0 % +101 2 12 6 50.0 % +102 5 30 6 20.0 % +102 2 12 6 50.0 % +102 2 12 6 50.0 % +102 3 18 6 33.3 % +102 3 18 6 33.3 % +102 4 24 6 25.0 % +102 6 36 6 16.7 % +102 6 36 6 16.7 % +103 3 18 6 33.3 % +103 1 6 6 100.0 % +103 7 42 6 14.3 % +103 8 48 6 12.5 % +103 6 36 6 16.7 % +103 3 18 6 33.3 % +103 3 18 6 33.3 % +103 6 36 6 16.7 % +104 3 18 6 33.3 % +104 1 6 6 100.0 % +104 2 12 6 50.0 % +104 2 12 6 50.0 % +104 5 30 6 20.0 % +104 5 30 6 20.0 % +104 5 30 6 20.0 % +104 1 6 6 100.0 % +105 8 48 6 12.5 % +105 2 12 6 50.0 % +105 3 18 6 33.3 % +105 4 24 6 25.0 % +105 6 36 6 16.7 % +105 3 18 6 33.3 % +105 4 24 6 25.0 % +105 2 12 6 50.0 % +106 4 24 6 25.0 % +106 4 24 6 25.0 % +106 4 24 6 25.0 % +106 11 66 6 9.1 % +106 5 30 6 20.0 % +106 3 18 6 33.3 % +106 5 30 6 20.0 % +106 9 54 6 11.1 % +107 6 36 6 16.7 % +107 10 60 6 10.0 % +107 7 42 6 14.3 % +107 5 30 6 20.0 % +107 4 24 6 25.0 % +107 5 30 6 20.0 % +107 2 12 6 50.0 % +107 6 36 6 16.7 % +108 1 6 6 100.0 % +108 1 6 6 100.0 % +108 4 24 6 25.0 % +108 3 18 6 33.3 % +108 11 66 6 9.1 % +108 1 6 6 100.0 % +108 4 24 6 25.0 % +108 1 6 6 100.0 % +109 2 12 6 50.0 % +109 1 6 6 100.0 % +109 2 12 6 50.0 % +109 2 12 6 50.0 % +109 10 60 6 10.0 % +109 5 30 6 20.0 % +109 3 18 6 33.3 % +109 5 30 6 20.0 % +110 2 12 6 50.0 % +110 2 12 6 50.0 % +110 2 12 6 50.0 % +110 1 6 6 100.0 % +110 4 24 6 25.0 % +110 4 24 6 25.0 % +110 9 54 6 11.1 % +110 8 48 6 12.5 % +111 3 18 6 33.3 % +111 2 12 6 50.0 % +111 2 12 6 50.0 % +111 4 24 6 25.0 % +111 4 24 6 25.0 % +111 6 36 6 16.7 % +111 3 18 6 33.3 % +111 4 24 6 25.0 % +112 5 30 6 20.0 % +112 3 18 6 33.3 % +112 3 18 6 33.3 % +112 3 18 6 33.3 % +112 10 60 6 10.0 % +112 1 6 6 100.0 % +112 11 66 6 9.1 % +112 2 12 6 50.0 % +113 1 6 6 100.0 % +113 8 48 6 12.5 % +113 2 12 6 50.0 % +113 1 6 6 100.0 % +113 7 42 6 14.3 % +113 3 18 6 33.3 % +113 2 12 6 50.0 % +113 7 42 6 14.3 % +114 2 12 6 50.0 % +114 1 6 6 100.0 % +114 3 18 6 33.3 % +114 4 24 6 25.0 % +114 1 6 6 100.0 % +114 2 12 6 50.0 % +114 3 18 6 33.3 % +114 5 30 6 20.0 % +115 1 6 6 100.0 % +115 9 54 6 11.1 % +115 5 30 6 20.0 % +115 4 24 6 25.0 % +115 6 36 6 16.7 % +115 3 18 6 33.3 % +115 6 36 6 16.7 % +115 3 18 6 33.3 % +116 3 18 6 33.3 % +116 3 18 6 33.3 % +116 6 36 6 16.7 % +116 4 24 6 25.0 % +116 2 12 6 50.0 % +116 3 18 6 33.3 % +116 5 30 6 20.0 % +116 1 6 6 100.0 % +117 6 36 6 16.7 % +117 9 54 6 11.1 % +117 7 42 6 14.3 % +117 1 6 6 100.0 % +117 8 48 6 12.5 % +117 2 12 6 50.0 % +117 3 18 6 33.3 % +117 7 42 6 14.3 % +118 2 12 6 50.0 % +118 7 42 6 14.3 % +118 8 48 6 12.5 % +118 5 30 6 20.0 % +118 3 18 6 33.3 % +118 2 12 6 50.0 % +118 1 6 6 100.0 % +118 4 24 6 25.0 % +119 4 24 6 25.0 % +119 3 18 6 33.3 % +119 2 12 6 50.0 % +119 2 12 6 50.0 % +119 2 12 6 50.0 % +119 3 18 6 33.3 % +119 2 12 6 50.0 % +119 3 18 6 33.3 % +120 12 72 6 8.3 % +120 13 78 6 7.7 % +120 2 12 6 50.0 % +120 6 36 6 16.7 % +120 3 18 6 33.3 % +120 1 6 6 100.0 % +120 2 12 6 50.0 % +120 3 18 6 33.3 % +121 2 12 6 50.0 % +121 5 30 6 20.0 % +121 3 18 6 33.3 % +121 2 12 6 50.0 % +121 2 12 6 50.0 % +121 6 36 6 16.7 % +121 4 24 6 25.0 % +121 4 24 6 25.0 % +122 2 12 6 50.0 % +122 1 6 6 100.0 % +122 2 12 6 50.0 % +122 5 30 6 20.0 % +122 4 24 6 25.0 % +122 3 18 6 33.3 % +122 4 24 6 25.0 % +122 4 24 6 25.0 % +123 3 18 6 33.3 % +123 5 30 6 20.0 % +123 3 18 6 33.3 % +123 6 36 6 16.7 % +123 2 12 6 50.0 % +123 2 12 6 50.0 % +123 4 24 6 25.0 % +123 4 24 6 25.0 % +124 3 18 6 33.3 % +124 3 18 6 33.3 % +124 1 6 6 100.0 % +124 4 24 6 25.0 % +124 2 12 6 50.0 % +124 2 12 6 50.0 % +124 1 6 6 100.0 % +124 5 30 6 20.0 % +125 2 12 6 50.0 % +125 4 24 6 25.0 % +125 6 36 6 16.7 % +125 4 24 6 25.0 % +125 1 6 6 100.0 % +125 4 24 6 25.0 % +125 1 6 6 100.0 % +125 3 18 6 33.3 % +126 1 6 6 100.0 % +126 9 54 6 11.1 % +126 7 42 6 14.3 % +126 3 18 6 33.3 % +126 8 48 6 12.5 % +126 3 18 6 33.3 % +126 2 12 6 50.0 % +126 6 36 6 16.7 % +127 4 24 6 25.0 % +127 10 60 6 10.0 % +127 3 18 6 33.3 % +127 1 6 6 100.0 % +127 4 24 6 25.0 % +127 4 24 6 25.0 % +127 1 6 6 100.0 % +127 1 6 6 100.0 % +128 1 6 6 100.0 % +128 4 24 6 25.0 % +128 2 12 6 50.0 % +128 2 12 6 50.0 % +128 1 6 6 100.0 % +128 2 12 6 50.0 % +128 2 12 6 50.0 % +128 1 6 6 100.0 % +129 2 12 6 50.0 % +129 3 18 6 33.3 % +129 3 18 6 33.3 % +129 1 6 6 100.0 % +129 7 42 6 14.3 % +129 3 18 6 33.3 % +129 5 30 6 20.0 % +129 3 18 6 33.3 % +130 3 18 6 33.3 % +130 2 12 6 50.0 % +130 5 30 6 20.0 % +130 3 18 6 33.3 % +130 4 24 6 25.0 % +130 7 42 6 14.3 % +130 2 12 6 50.0 % +130 3 18 6 33.3 % +131 3 18 6 33.3 % +131 6 36 6 16.7 % +131 6 36 6 16.7 % +131 2 12 6 50.0 % +131 2 12 6 50.0 % +131 1 6 6 100.0 % +131 4 24 6 25.0 % +131 6 36 6 16.7 % +132 2 12 6 50.0 % +132 3 18 6 33.3 % +132 4 24 6 25.0 % +132 1 6 6 100.0 % +132 1 6 6 100.0 % +132 5 30 6 20.0 % +132 4 24 6 25.0 % +132 3 18 6 33.3 % +133 4 24 6 25.0 % +133 4 24 6 25.0 % +133 3 18 6 33.3 % +133 3 18 6 33.3 % +133 1 6 6 100.0 % +133 1 6 6 100.0 % +133 4 24 6 25.0 % +133 4 24 6 25.0 % +134 4 24 6 25.0 % +134 2 12 6 50.0 % +134 3 18 6 33.3 % +134 7 42 6 14.3 % +134 3 18 6 33.3 % +134 3 18 6 33.3 % +134 4 24 6 25.0 % +134 2 12 6 50.0 % +135 2 12 6 50.0 % +135 1 6 6 100.0 % +135 1 6 6 100.0 % +135 2 12 6 50.0 % +135 10 60 6 10.0 % +135 4 24 6 25.0 % +135 2 12 6 50.0 % +135 11 66 6 9.1 % +136 1 6 6 100.0 % +136 9 54 6 11.1 % +136 9 54 6 11.1 % +136 3 18 6 33.3 % +136 10 60 6 10.0 % +136 2 12 6 50.0 % +136 4 24 6 25.0 % +136 4 24 6 25.0 % +137 8 48 6 12.5 % +137 1 6 6 100.0 % +137 2 12 6 50.0 % +137 2 12 6 50.0 % +137 8 48 6 12.5 % +137 2 12 6 50.0 % +137 4 24 6 25.0 % +137 3 18 6 33.3 % +138 6 36 6 16.7 % +138 5 30 6 20.0 % +138 4 24 6 25.0 % +138 4 24 6 25.0 % +138 1 6 6 100.0 % +138 1 6 6 100.0 % +138 4 24 6 25.0 % +138 9 54 6 11.1 % +139 2 12 6 50.0 % +139 4 24 6 25.0 % +139 2 12 6 50.0 % +139 5 30 6 20.0 % +139 3 18 6 33.3 % +139 6 36 6 16.7 % +139 3 18 6 33.3 % +139 4 24 6 25.0 % +140 6 36 6 16.7 % +140 2 12 6 50.0 % +140 6 36 6 16.7 % +140 3 18 6 33.3 % +140 7 42 6 14.3 % +140 5 30 6 20.0 % +140 3 18 6 33.3 % +140 7 42 6 14.3 % +141 3 18 6 33.3 % +141 5 30 6 20.0 % +141 1 6 6 100.0 % +141 3 18 6 33.3 % +141 11 66 6 9.1 % +141 3 18 6 33.3 % +141 7 42 6 14.3 % +141 2 12 6 50.0 % +142 2 12 6 50.0 % +142 2 12 6 50.0 % +142 6 36 6 16.7 % +142 2 12 6 50.0 % +142 7 42 6 14.3 % +142 5 30 6 20.0 % +142 4 24 6 25.0 % +142 3 18 6 33.3 % +143 1 6 6 100.0 % +143 3 18 6 33.3 % +143 7 42 6 14.3 % +143 2 12 6 50.0 % +143 2 12 6 50.0 % +143 10 60 6 10.0 % +143 3 18 6 33.3 % +143 1 6 6 100.0 % +144 4 24 6 25.0 % +144 3 18 6 33.3 % +144 2 12 6 50.0 % +144 5 30 6 20.0 % +144 7 42 6 14.3 % +144 5 30 6 20.0 % +144 9 54 6 11.1 % +144 2 12 6 50.0 % +145 1 6 6 100.0 % +145 4 24 6 25.0 % +145 4 24 6 25.0 % +145 1 6 6 100.0 % +145 1 6 6 100.0 % +145 1 6 6 100.0 % +145 5 30 6 20.0 % +145 5 30 6 20.0 % +146 1 6 6 100.0 % +146 4 24 6 25.0 % +146 3 18 6 33.3 % +146 2 12 6 50.0 % +146 2 12 6 50.0 % +146 4 24 6 25.0 % +146 3 18 6 33.3 % +146 9 54 6 11.1 % +147 5 30 6 20.0 % +147 2 12 6 50.0 % +147 2 12 6 50.0 % +147 3 18 6 33.3 % +147 4 24 6 25.0 % +147 1 6 6 100.0 % +147 4 24 6 25.0 % +147 1 6 6 100.0 % +148 4 24 6 25.0 % +148 2 12 6 50.0 % +148 4 24 6 25.0 % +148 7 42 6 14.3 % +148 1 6 6 100.0 % +148 3 18 6 33.3 % +148 6 36 6 16.7 % +148 6 36 6 16.7 % +149 6 36 6 16.7 % +149 6 36 6 16.7 % +149 5 30 6 20.0 % +149 8 48 6 12.5 % +149 1 6 6 100.0 % +149 3 18 6 33.3 % +149 3 18 6 33.3 % +149 4 24 6 25.0 % +150 6 36 6 16.7 % +150 5 30 6 20.0 % +150 5 30 6 20.0 % +150 5 30 6 20.0 % +150 6 36 6 16.7 % +150 2 12 6 50.0 % +150 3 18 6 33.3 % +150 1 6 6 100.0 % +151 2 12 6 50.0 % +151 3 18 6 33.3 % +151 2 12 6 50.0 % +151 3 18 6 33.3 % +151 1 6 6 100.0 % +151 3 18 6 33.3 % +151 3 18 6 33.3 % +151 5 30 6 20.0 % +152 2 12 6 50.0 % +152 1 6 6 100.0 % +152 1 6 6 100.0 % +152 3 18 6 33.3 % +152 2 12 6 50.0 % +152 2 12 6 50.0 % +152 4 24 6 25.0 % +152 2 12 6 50.0 % +153 4 24 6 25.0 % +153 1 6 6 100.0 % +153 2 12 6 50.0 % +153 4 24 6 25.0 % +153 9 54 6 11.1 % +153 2 12 6 50.0 % +153 1 6 6 100.0 % +153 8 48 6 12.5 % +154 6 36 6 16.7 % +154 4 24 6 25.0 % +154 1 6 6 100.0 % +154 6 36 6 16.7 % +154 2 12 6 50.0 % +154 3 18 6 33.3 % +154 4 24 6 25.0 % +154 2 12 6 50.0 % +155 7 42 6 14.3 % +155 2 12 6 50.0 % +155 3 18 6 33.3 % +155 2 12 6 50.0 % +155 2 12 6 50.0 % +155 3 18 6 33.3 % +155 13 78 6 7.7 % +155 2 12 6 50.0 % +156 2 12 6 50.0 % +156 4 24 6 25.0 % +156 3 18 6 33.3 % +156 3 18 6 33.3 % +156 3 18 6 33.3 % +156 5 30 6 20.0 % +156 2 12 6 50.0 % +156 2 12 6 50.0 % +157 3 18 6 33.3 % +157 5 30 6 20.0 % +157 7 42 6 14.3 % +157 5 30 6 20.0 % +157 6 36 6 16.7 % +157 4 24 6 25.0 % +157 2 12 6 50.0 % +157 3 18 6 33.3 % +158 9 54 6 11.1 % +158 2 12 6 50.0 % +158 2 12 6 50.0 % +158 4 24 6 25.0 % +158 3 18 6 33.3 % +150 2 12 6 50.0 % +150 6 36 6 16.7 % +150 4 24 6 25.0 % +150 7 42 6 14.3 % +150 3 18 6 33.3 % +150 3 18 6 33.3 % +150 1 6 6 100.0 % +150 2 12 6 50.0 % +151 2 12 6 50.0 % +151 3 18 6 33.3 % +151 4 24 6 25.0 % +151 1 6 6 100.0 % +151 8 48 6 12.5 % +151 2 12 6 50.0 % +151 3 18 6 33.3 % +151 9 54 6 11.1 % +152 4 24 6 25.0 % +152 2 12 6 50.0 % +152 8 48 6 12.5 % +152 3 18 6 33.3 % +152 2 12 6 50.0 % +152 3 18 6 33.3 % +152 2 12 6 50.0 % +152 3 18 6 33.3 % +153 6 36 6 16.7 % +153 4 24 6 25.0 % +153 5 30 6 20.0 % +153 3 18 6 33.3 % +153 3 18 6 33.3 % +153 4 24 6 25.0 % +153 4 24 6 25.0 % +153 1 6 6 100.0 % +154 6 36 6 16.7 % +154 3 18 6 33.3 % +154 2 12 6 50.0 % +154 1 6 6 100.0 % +154 2 12 6 50.0 % +154 5 30 6 20.0 % +154 2 12 6 50.0 % +154 2 12 6 50.0 % +155 3 18 6 33.3 % +155 4 24 6 25.0 % +155 10 60 6 10.0 % +155 8 48 6 12.5 % +155 5 30 6 20.0 % +155 3 18 6 33.3 % +155 8 48 6 12.5 % +155 6 36 6 16.7 % +156 3 18 6 33.3 % +156 3 18 6 33.3 % +156 6 36 6 16.7 % +156 3 18 6 33.3 % +156 1 6 6 100.0 % +156 3 18 6 33.3 % +156 1 6 6 100.0 % +156 4 24 6 25.0 % +157 5 30 6 20.0 % +157 3 18 6 33.3 % +157 4 24 6 25.0 % +157 9 54 6 11.1 % +157 4 24 6 25.0 % +157 4 24 6 25.0 % +157 6 36 6 16.7 % +157 3 18 6 33.3 % +158 3 18 6 33.3 % +158 2 12 6 50.0 % +158 5 30 6 20.0 % +158 4 24 6 25.0 % +158 4 24 6 25.0 % +158 11 66 6 9.1 % +158 5 30 6 20.0 % +158 3 18 6 33.3 % +159 8 48 6 12.5 % +159 6 36 6 16.7 % +159 6 36 6 16.7 % +159 2 12 6 50.0 % +159 8 48 6 12.5 % +159 4 24 6 25.0 % +159 4 24 6 25.0 % +159 4 24 6 25.0 % +160 4 24 6 25.0 % +160 6 36 6 16.7 % +160 1 6 6 100.0 % +160 6 36 6 16.7 % +160 2 12 6 50.0 % +160 5 30 6 20.0 % +160 2 12 6 50.0 % +160 8 48 6 12.5 % +161 4 24 6 25.0 % +161 4 24 6 25.0 % +161 2 12 6 50.0 % +161 3 18 6 33.3 % +161 1 6 6 100.0 % +161 2 12 6 50.0 % +161 10 60 6 10.0 % +161 5 30 6 20.0 % +162 3 18 6 33.3 % +162 6 36 6 16.7 % +162 1 6 6 100.0 % +162 3 18 6 33.3 % +162 1 6 6 100.0 % +162 1 6 6 100.0 % +162 4 24 6 25.0 % +162 4 24 6 25.0 % +163 5 30 6 20.0 % +163 4 24 6 25.0 % +163 6 36 6 16.7 % +163 2 12 6 50.0 % +163 3 18 6 33.3 % +163 2 12 6 50.0 % +163 2 12 6 50.0 % +163 6 36 6 16.7 % +164 2 12 6 50.0 % +164 6 36 6 16.7 % +164 7 42 6 14.3 % +164 5 30 6 20.0 % +164 1 6 6 100.0 % +164 5 30 6 20.0 % +164 3 18 6 33.3 % +164 5 30 6 20.0 % +165 5 30 6 20.0 % +165 7 42 6 14.3 % +165 5 30 6 20.0 % +165 1 6 6 100.0 % +165 2 12 6 50.0 % +165 8 48 6 12.5 % +165 3 18 6 33.3 % +165 10 60 6 10.0 % +166 2 12 6 50.0 % +166 7 42 6 14.3 % +166 3 18 6 33.3 % +166 3 18 6 33.3 % +166 4 24 6 25.0 % +166 3 18 6 33.3 % +166 3 18 6 33.3 % +166 5 30 6 20.0 % +167 1 6 6 100.0 % +167 9 54 6 11.1 % +167 5 30 6 20.0 % +167 4 24 6 25.0 % +167 6 36 6 16.7 % +167 1 6 6 100.0 % +167 4 24 6 25.0 % +167 6 36 6 16.7 % +168 2 12 6 50.0 % +168 5 30 6 20.0 % +168 3 18 6 33.3 % +168 4 24 6 25.0 % +168 3 18 6 33.3 % +168 2 12 6 50.0 % +168 1 6 6 100.0 % +168 2 12 6 50.0 % +169 5 30 6 20.0 % +169 1 6 6 100.0 % +169 6 36 6 16.7 % +169 2 12 6 50.0 % +169 8 48 6 12.5 % +169 6 36 6 16.7 % +169 3 18 6 33.3 % +169 6 36 6 16.7 % +170 2 12 6 50.0 % +170 5 30 6 20.0 % +170 5 30 6 20.0 % +170 2 12 6 50.0 % +170 5 30 6 20.0 % +170 2 12 6 50.0 % +170 8 48 6 12.5 % +170 5 30 6 20.0 % +171 2 12 6 50.0 % +171 2 12 6 50.0 % +171 2 12 6 50.0 % +171 7 42 6 14.3 % +171 1 6 6 100.0 % +171 4 24 6 25.0 % +171 1 6 6 100.0 % +171 3 18 6 33.3 % +172 3 18 6 33.3 % +172 2 12 6 50.0 % +172 1 6 6 100.0 % +172 3 18 6 33.3 % +172 12 72 6 8.3 % +172 8 48 6 12.5 % +172 6 36 6 16.7 % +172 2 12 6 50.0 % +173 5 30 6 20.0 % +173 3 18 6 33.3 % +173 1 6 6 100.0 % +173 2 12 6 50.0 % +173 2 12 6 50.0 % +173 3 18 6 33.3 % +173 4 24 6 25.0 % +173 1 6 6 100.0 % +174 5 30 6 20.0 % +174 2 12 6 50.0 % +174 3 18 6 33.3 % +174 4 24 6 25.0 % +174 6 36 6 16.7 % +174 3 18 6 33.3 % +174 1 6 6 100.0 % +174 2 12 6 50.0 % +175 5 30 6 20.0 % +175 4 24 6 25.0 % +175 4 24 6 25.0 % +175 3 18 6 33.3 % +175 3 18 6 33.3 % +175 2 12 6 50.0 % +175 7 42 6 14.3 % +175 3 18 6 33.3 % +176 6 36 6 16.7 % +176 1 6 6 100.0 % +176 3 18 6 33.3 % +176 4 24 6 25.0 % +176 6 36 6 16.7 % +176 3 18 6 33.3 % +176 2 12 6 50.0 % +176 4 24 6 25.0 % +177 3 18 6 33.3 % +177 2 12 6 50.0 % +177 5 30 6 20.0 % +177 2 12 6 50.0 % +177 4 24 6 25.0 % +177 7 42 6 14.3 % +177 3 18 6 33.3 % +177 2 12 6 50.0 % +178 4 24 6 25.0 % +178 1 6 6 100.0 % +178 8 48 6 12.5 % +178 8 48 6 12.5 % +178 3 18 6 33.3 % +178 3 18 6 33.3 % +178 8 48 6 12.5 % +178 3 18 6 33.3 % +179 7 42 6 14.3 % +179 5 30 6 20.0 % +179 10 60 6 10.0 % +179 3 18 6 33.3 % +179 1 6 6 100.0 % +179 6 36 6 16.7 % +179 3 18 6 33.3 % +179 1 6 6 100.0 % +180 1 6 6 100.0 % +180 4 24 6 25.0 % +180 2 12 6 50.0 % +180 2 12 6 50.0 % +180 2 12 6 50.0 % +180 2 12 6 50.0 % +180 2 12 6 50.0 % +180 3 18 6 33.3 % +181 4 24 6 25.0 % +181 2 12 6 50.0 % +181 6 36 6 16.7 % +181 1 6 6 100.0 % +181 3 18 6 33.3 % +181 5 30 6 20.0 % +181 3 18 6 33.3 % +181 2 12 6 50.0 % +182 1 6 6 100.0 % +182 2 12 6 50.0 % +182 2 12 6 50.0 % +182 4 24 6 25.0 % +182 2 12 6 50.0 % +182 4 24 6 25.0 % +182 1 6 6 100.0 % +182 6 36 6 16.7 % +183 3 18 6 33.3 % +183 2 12 6 50.0 % +183 3 18 6 33.3 % +183 2 12 6 50.0 % +183 9 54 6 11.1 % +183 2 12 6 50.0 % +183 2 12 6 50.0 % +183 2 12 6 50.0 % +184 1 6 6 100.0 % +184 9 54 6 11.1 % +184 2 12 6 50.0 % +184 3 18 6 33.3 % +184 4 24 6 25.0 % +184 1 6 6 100.0 % +184 3 18 6 33.3 % +184 4 24 6 25.0 % +185 6 36 6 16.7 % +185 4 24 6 25.0 % +185 7 42 6 14.3 % +185 3 18 6 33.3 % +185 1 6 6 100.0 % +185 5 30 6 20.0 % +185 4 24 6 25.0 % +185 1 6 6 100.0 % +186 4 24 6 25.0 % +186 4 24 6 25.0 % +186 6 36 6 16.7 % +186 2 12 6 50.0 % +186 2 12 6 50.0 % +186 3 18 6 33.3 % +186 4 24 6 25.0 % +186 4 24 6 25.0 % +187 1 6 6 100.0 % +187 1 6 6 100.0 % +187 2 12 6 50.0 % +187 10 60 6 10.0 % +187 4 24 6 25.0 % +187 2 12 6 50.0 % +187 11 66 6 9.1 % +187 1 6 6 100.0 % +188 5 30 6 20.0 % +188 6 36 6 16.7 % +188 6 36 6 16.7 % +188 3 18 6 33.3 % +188 5 30 6 20.0 % +188 7 42 6 14.3 % +188 2 12 6 50.0 % +188 7 42 6 14.3 % +189 8 48 6 12.5 % +189 1 6 6 100.0 % +189 2 12 6 50.0 % +189 2 12 6 50.0 % +189 4 24 6 25.0 % +189 4 24 6 25.0 % +189 1 6 6 100.0 % +189 3 18 6 33.3 % +190 3 18 6 33.3 % +190 5 30 6 20.0 % +190 8 48 6 12.5 % +190 4 24 6 25.0 % +190 4 24 6 25.0 % +190 1 6 6 100.0 % +190 1 6 6 100.0 % +190 4 24 6 25.0 % +191 9 54 6 11.1 % +191 4 24 6 25.0 % +191 4 24 6 25.0 % +191 5 30 6 20.0 % +191 3 18 6 33.3 % +191 6 36 6 16.7 % +191 1 6 6 100.0 % +191 5 30 6 20.0 % +192 3 18 6 33.3 % +192 4 24 6 25.0 % +192 2 12 6 50.0 % +192 6 36 6 16.7 % +192 3 18 6 33.3 % +192 7 42 6 14.3 % +192 4 24 6 25.0 % +192 1 6 6 100.0 % +193 3 18 6 33.3 % +193 9 54 6 11.1 % +193 3 18 6 33.3 % +193 3 18 6 33.3 % +193 1 6 6 100.0 % +193 3 18 6 33.3 % +193 8 48 6 12.5 % +193 3 18 6 33.3 % +194 3 18 6 33.3 % +194 7 42 6 14.3 % +194 2 12 6 50.0 % +194 1 6 6 100.0 % +194 1 6 6 100.0 % +194 2 12 6 50.0 % +194 8 48 6 12.5 % +194 7 42 6 14.3 % +195 5 30 6 20.0 % +195 6 36 6 16.7 % +195 1 6 6 100.0 % +195 1 6 6 100.0 % +195 10 60 6 10.0 % +195 2 12 6 50.0 % +195 3 18 6 33.3 % +195 10 60 6 10.0 % +196 2 12 6 50.0 % +196 5 30 6 20.0 % +196 4 24 6 25.0 % +196 6 36 6 16.7 % +196 7 42 6 14.3 % +196 3 18 6 33.3 % +196 2 12 6 50.0 % +196 9 54 6 11.1 % +197 2 12 6 50.0 % +197 1 6 6 100.0 % +197 6 36 6 16.7 % +197 3 18 6 33.3 % +197 2 12 6 50.0 % +197 8 48 6 12.5 % +197 2 12 6 50.0 % +197 2 12 6 50.0 % +198 6 36 6 16.7 % +198 1 6 6 100.0 % +198 1 6 6 100.0 % +198 2 12 6 50.0 % +198 1 6 6 100.0 % +198 4 24 6 25.0 % +198 2 12 6 50.0 % +198 9 54 6 11.1 % +199 1 6 6 100.0 % +199 4 24 6 25.0 % +199 4 24 6 25.0 % +199 3 18 6 33.3 % +199 4 24 6 25.0 % +199 1 6 6 100.0 % +199 4 24 6 25.0 % +199 3 18 6 33.3 % diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..17bba0d --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "temperature": 0.6, + "top_p": 0.95, + "transformers_version": "4.57.1" +} diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000..cc63ce9 --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98ccc71896f8d70f009f72e8e6eb3ab2757304080fe0eba8fe6363231250d54c +size 4976698672 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000..bafb134 --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3df560d0d310ddc87a51744f6692772f20a5635dd7793e77652e80f7e954ce32 +size 4999802720 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000..7b51a2f --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05011c5891c4cc463e5f7c6fc38d4e4af1ebcb00b4d0bac2766f2a553c7c7102 +size 4915916176 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000..f70cb83 --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edcdb94b82a8210d7a1dc489519883594d99d87a7f96573192d97ebf57c81c62 +size 1168138808 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..5c64f1e --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,299 @@ +{ + "metadata": { + "total_parameters": 8030261248, + "total_size": 16060522496 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..1d385d6 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..4f47952 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d91915040cfac999d8c55f4b5bc6e67367c065e3a7a4e4b9438ce1f256addd86 +size 17209530 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..dd34db6 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": null, + "added_tokens_decoder": { + "128000": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128012": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128013": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128014": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128015": { + "content": "<|▁pad▁|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin▁of▁sentence|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|end▁of▁sentence|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 16384, + "pad_token": "<|end▁of▁sentence|>", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizerFast", + "unk_token": null, + "use_default_system_prompt": false +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..db68460 --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "total_flos": 0.0, + "train_loss": 0.009447227440541611, + "train_runtime": 101500.2967, + "train_samples": 7000, + "train_samples_per_second": 0.095, + "train_steps_per_second": 0.002 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..ccd3221 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,3843 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.22857142857142856, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_fraction": 0.0, + "completion_length": 2124.791679382324, + "dapo/avg_reward_std": 0.28261276125907897, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.42666667342185977, + "dapo/num_sampling_attempts": 3.125, + "dapo/sampling_efficiency": 45.83333333333333, + "dapo/total_prompts_processed": 18.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.001142857142857143, + "grad_norm": 0.03718917816877365, + "kl": 0.0, + "learning_rate": 0.0, + "loss": -0.0465, + "reward": 0.6372265852987766, + "reward_std": 0.9629172012209892, + "step": 1 + }, + { + "clip_fraction": 0.0, + "completion_length": 2559.6631774902344, + "dapo/avg_reward_std": 0.2737089714833668, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.39285715403301374, + "dapo/num_sampling_attempts": 3.5, + "dapo/sampling_efficiency": 32.291666666666664, + "dapo/total_prompts_processed": 21.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.002285714285714286, + "grad_norm": 0.031548872590065, + "kl": 0.0, + "learning_rate": 1e-07, + "loss": 0.0292, + "reward": 0.2883484517224133, + "reward_std": 0.9225177392363548, + "step": 2 + }, + { + "clip_fraction": 0.0, + "completion_length": 2259.0243072509766, + "dapo/avg_reward_std": 0.30627372419392623, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.40740741734151487, + "dapo/num_sampling_attempts": 3.375, + "dapo/sampling_efficiency": 38.33333333333333, + "dapo/total_prompts_processed": 20.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.0034285714285714284, + "grad_norm": 0.028476394712924957, + "kl": 3.738701343536377e-05, + "learning_rate": 2e-07, + "loss": 0.0118, + "reward": 0.5692771524190903, + "reward_std": 0.9722258150577545, + "step": 3 + }, + { + "clip_fraction": 0.0, + "completion_length": 2388.763916015625, + "dapo/avg_reward_std": 0.2417103610932827, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.34895834093913436, + "dapo/num_sampling_attempts": 4.0, + "dapo/sampling_efficiency": 29.479166666666664, + "dapo/total_prompts_processed": 24.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.004571428571428572, + "grad_norm": 0.03074878267943859, + "kl": 3.4555792808532715e-05, + "learning_rate": 3e-07, + "loss": 0.0428, + "reward": 0.5176859218627214, + "reward_std": 0.9351213574409485, + "step": 4 + }, + { + "clip_fraction": 0.0, + "completion_length": 2228.9131927490234, + "dapo/avg_reward_std": 0.24784977205338016, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3494623731220922, + "dapo/num_sampling_attempts": 3.875, + "dapo/sampling_efficiency": 34.375, + "dapo/total_prompts_processed": 23.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.005714285714285714, + "grad_norm": 0.03052515536546707, + "kl": 4.2438507080078125e-05, + "learning_rate": 4e-07, + "loss": 0.0573, + "reward": 0.5747799873352051, + "reward_std": 0.9150463417172432, + "step": 5 + }, + { + "clip_fraction": 0.0, + "completion_length": 2526.2743377685547, + "dapo/avg_reward_std": 0.31032066589052026, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.4772727367552844, + "dapo/num_sampling_attempts": 2.75, + "dapo/sampling_efficiency": 39.58333333333333, + "dapo/total_prompts_processed": 16.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.006857142857142857, + "grad_norm": 0.031065233051776886, + "kl": 6.331503391265869e-05, + "learning_rate": 5e-07, + "loss": 0.068, + "reward": 0.49577395524829626, + "reward_std": 0.9604900777339935, + "step": 6 + }, + { + "clip_fraction": 0.0, + "completion_length": 2096.857650756836, + "dapo/avg_reward_std": 0.30248596491637053, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.43827161303272955, + "dapo/num_sampling_attempts": 3.375, + "dapo/sampling_efficiency": 33.33333333333333, + "dapo/total_prompts_processed": 20.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.008, + "grad_norm": 0.03395611792802811, + "kl": 3.603100776672363e-05, + "learning_rate": 6e-07, + "loss": 0.0104, + "reward": 0.6337036956101656, + "reward_std": 0.9339632987976074, + "step": 7 + }, + { + "clip_fraction": 0.0, + "completion_length": 2080.482681274414, + "dapo/avg_reward_std": 0.2619025791063905, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3489583395421505, + "dapo/num_sampling_attempts": 4.0, + "dapo/sampling_efficiency": 27.82738095238095, + "dapo/total_prompts_processed": 24.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.009142857142857144, + "grad_norm": 0.030713744461536407, + "kl": 3.699958324432373e-05, + "learning_rate": 7e-07, + "loss": 0.0191, + "reward": 0.5047293808311224, + "reward_std": 0.9456561654806137, + "step": 8 + }, + { + "clip_fraction": 0.0, + "completion_length": 2575.715316772461, + "dapo/avg_reward_std": 0.26183396059533826, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.4275362387947414, + "dapo/num_sampling_attempts": 2.875, + "dapo/sampling_efficiency": 56.25, + "dapo/total_prompts_processed": 17.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.010285714285714285, + "grad_norm": 0.02862783893942833, + "kl": 3.787875175476074e-05, + "learning_rate": 8e-07, + "loss": 0.0251, + "reward": 0.49641977716237307, + "reward_std": 0.9346907436847687, + "step": 9 + }, + { + "clip_fraction": 0.0, + "completion_length": 2574.7951431274414, + "dapo/avg_reward_std": 0.2888991279261453, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.46031746694019865, + "dapo/num_sampling_attempts": 2.625, + "dapo/sampling_efficiency": 61.875, + "dapo/total_prompts_processed": 15.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.011428571428571429, + "grad_norm": 0.03313002362847328, + "kl": 2.9653310775756836e-05, + "learning_rate": 9e-07, + "loss": 0.0131, + "reward": 0.6514056231826544, + "reward_std": 0.9486276879906654, + "step": 10 + }, + { + "clip_fraction": 0.0, + "completion_length": 2648.3541870117188, + "dapo/avg_reward_std": 0.1985154973136054, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.23333333631356556, + "dapo/num_sampling_attempts": 5.625, + "dapo/sampling_efficiency": 22.747252747252745, + "dapo/total_prompts_processed": 33.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.012571428571428572, + "grad_norm": 0.02842891961336136, + "kl": 4.6372413635253906e-05, + "learning_rate": 1e-06, + "loss": 0.0228, + "reward": 0.3831507060676813, + "reward_std": 0.9138674512505531, + "step": 11 + }, + { + "clip_fraction": 0.0, + "completion_length": 2340.7708435058594, + "dapo/avg_reward_std": 0.21896107792854308, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.25000000558793545, + "dapo/num_sampling_attempts": 5.0, + "dapo/sampling_efficiency": 29.791666666666664, + "dapo/total_prompts_processed": 30.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.013714285714285714, + "grad_norm": 0.02896970883011818, + "kl": 3.764033317565918e-05, + "learning_rate": 9.997258721585931e-07, + "loss": 0.0141, + "reward": 0.3742078524082899, + "reward_std": 0.9111683145165443, + "step": 12 + }, + { + "clip_fraction": 0.0, + "completion_length": 2731.9687576293945, + "dapo/avg_reward_std": 0.2593883651274222, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.39506174016881873, + "dapo/num_sampling_attempts": 3.375, + "dapo/sampling_efficiency": 43.95833333333333, + "dapo/total_prompts_processed": 20.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.014857142857142857, + "grad_norm": 0.028494343161582947, + "kl": 4.1812658309936523e-05, + "learning_rate": 9.989038226169207e-07, + "loss": 0.0482, + "reward": 0.37119605229236186, + "reward_std": 0.9484475553035736, + "step": 13 + }, + { + "clip_fraction": 0.0, + "completion_length": 2346.684066772461, + "dapo/avg_reward_std": 0.2633256334247011, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3787878860126842, + "dapo/num_sampling_attempts": 4.125, + "dapo/sampling_efficiency": 40.416666666666664, + "dapo/total_prompts_processed": 24.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.016, + "grad_norm": 0.03419339284300804, + "kl": 3.219395875930786e-05, + "learning_rate": 9.975348529157229e-07, + "loss": 0.0443, + "reward": 0.5307169873267412, + "reward_std": 0.8819384500384331, + "step": 14 + }, + { + "clip_fraction": 0.0, + "completion_length": 2438.8437881469727, + "dapo/avg_reward_std": 0.31698794450078693, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.48412699571677614, + "dapo/num_sampling_attempts": 2.625, + "dapo/sampling_efficiency": 49.99999999999999, + "dapo/total_prompts_processed": 15.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.017142857142857144, + "grad_norm": 0.03230522945523262, + "kl": 3.4749507904052734e-05, + "learning_rate": 9.956206309337066e-07, + "loss": 0.0519, + "reward": 0.6968788839876652, + "reward_std": 0.9826493486762047, + "step": 15 + }, + { + "clip_fraction": 0.0, + "completion_length": 2835.3125076293945, + "dapo/avg_reward_std": 0.2820873036980629, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.36111111876865226, + "dapo/num_sampling_attempts": 3.0, + "dapo/sampling_efficiency": 49.375, + "dapo/total_prompts_processed": 18.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.018285714285714287, + "grad_norm": 0.026719439774751663, + "kl": 3.375113010406494e-05, + "learning_rate": 9.931634888554935e-07, + "loss": 0.0158, + "reward": 0.4585288055241108, + "reward_std": 0.9621468484401703, + "step": 16 + }, + { + "clip_fraction": 0.0, + "completion_length": 2489.513870239258, + "dapo/avg_reward_std": 0.24821309347947437, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.35000000447034835, + "dapo/num_sampling_attempts": 3.75, + "dapo/sampling_efficiency": 51.25, + "dapo/total_prompts_processed": 22.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.019428571428571427, + "grad_norm": 0.030841730535030365, + "kl": 3.2588839530944824e-05, + "learning_rate": 9.901664203302124e-07, + "loss": 0.0342, + "reward": 0.4615583084523678, + "reward_std": 0.8882262408733368, + "step": 17 + }, + { + "clip_fraction": 0.0, + "completion_length": 2291.8854217529297, + "dapo/avg_reward_std": 0.3492339625954628, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.4000000149011612, + "dapo/num_sampling_attempts": 2.5, + "dapo/sampling_efficiency": 46.87499999999999, + "dapo/total_prompts_processed": 15.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.02057142857142857, + "grad_norm": 0.4981432557106018, + "kl": 4.331767559051514e-05, + "learning_rate": 9.866330768241983e-07, + "loss": 0.0782, + "reward": 0.5650830613449216, + "reward_std": 0.960162565112114, + "step": 18 + }, + { + "clip_fraction": 0.0, + "completion_length": 1727.9479217529297, + "dapo/avg_reward_std": 0.2201171379822951, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2863247940937678, + "dapo/num_sampling_attempts": 4.875, + "dapo/sampling_efficiency": 27.01388888888889, + "dapo/total_prompts_processed": 29.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.021714285714285714, + "grad_norm": 0.034473638981580734, + "kl": 2.7894973754882812e-05, + "learning_rate": 9.825677631722435e-07, + "loss": -0.0027, + "reward": 0.5283844769001007, + "reward_std": 0.9302913695573807, + "step": 19 + }, + { + "clip_fraction": 0.0, + "completion_length": 1848.9062576293945, + "dapo/avg_reward_std": 0.2080523163983316, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3030303070942561, + "dapo/num_sampling_attempts": 4.125, + "dapo/sampling_efficiency": 40.74404761904762, + "dapo/total_prompts_processed": 24.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.022857142857142857, + "grad_norm": 0.03650596737861633, + "kl": 2.997368574142456e-05, + "learning_rate": 9.779754323328192e-07, + "loss": 0.0066, + "reward": 0.47246094793081284, + "reward_std": 0.925552561879158, + "step": 20 + }, + { + "clip_fraction": 0.0, + "completion_length": 2310.6354370117188, + "dapo/avg_reward_std": 0.18431008011102676, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.26250000260770323, + "dapo/num_sampling_attempts": 5.0, + "dapo/sampling_efficiency": 32.53472222222222, + "dapo/total_prompts_processed": 30.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.024, + "grad_norm": 0.02872428111732006, + "kl": 3.707408905029297e-05, + "learning_rate": 9.728616793536587e-07, + "loss": 0.0041, + "reward": 0.5466808546334505, + "reward_std": 0.9614025354385376, + "step": 21 + }, + { + "clip_fraction": 0.0, + "completion_length": 2628.4618072509766, + "dapo/avg_reward_std": 0.27239492272629456, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3235294157091309, + "dapo/num_sampling_attempts": 4.25, + "dapo/sampling_efficiency": 26.875, + "dapo/total_prompts_processed": 25.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.025142857142857144, + "grad_norm": 0.03156612813472748, + "kl": 4.024803638458252e-05, + "learning_rate": 9.672327345550543e-07, + "loss": 0.0396, + "reward": 0.4231120813637972, + "reward_std": 0.9312948659062386, + "step": 22 + }, + { + "clip_fraction": 0.0, + "completion_length": 2495.7673873901367, + "dapo/avg_reward_std": 0.30711027341229574, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3988095335662365, + "dapo/num_sampling_attempts": 3.5, + "dapo/sampling_efficiency": 31.249999999999993, + "dapo/total_prompts_processed": 21.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.026285714285714287, + "grad_norm": 0.028224533423781395, + "kl": 3.413856029510498e-05, + "learning_rate": 9.610954559391704e-07, + "loss": 0.0195, + "reward": 0.5285261562094092, + "reward_std": 0.9373103529214859, + "step": 23 + }, + { + "clip_fraction": 0.0, + "completion_length": 1944.9201278686523, + "dapo/avg_reward_std": 0.29968351125717163, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.4533333480358124, + "dapo/num_sampling_attempts": 3.125, + "dapo/sampling_efficiency": 44.27083333333333, + "dapo/total_prompts_processed": 18.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.027428571428571427, + "grad_norm": 0.03633056953549385, + "kl": 3.1538307666778564e-05, + "learning_rate": 9.54457320834625e-07, + "loss": 0.0693, + "reward": 0.5397752095013857, + "reward_std": 0.9495814517140388, + "step": 24 + }, + { + "clip_fraction": 0.0, + "completion_length": 2616.593780517578, + "dapo/avg_reward_std": 0.16712580593127124, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.19811321232678755, + "dapo/num_sampling_attempts": 6.625, + "dapo/sampling_efficiency": 19.166666666666664, + "dapo/total_prompts_processed": 39.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.02857142857142857, + "grad_norm": 0.024344539269804955, + "kl": 3.676116466522217e-05, + "learning_rate": 9.473264167865171e-07, + "loss": 0.0139, + "reward": 0.3185653127729893, + "reward_std": 0.9151088818907738, + "step": 25 + }, + { + "clip_fraction": 0.0, + "completion_length": 2116.7257232666016, + "dapo/avg_reward_std": 0.27600910129218265, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.33908046319566926, + "dapo/num_sampling_attempts": 3.625, + "dapo/sampling_efficiency": 44.6875, + "dapo/total_prompts_processed": 21.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.029714285714285714, + "grad_norm": 0.031155193224549294, + "kl": 3.579258918762207e-05, + "learning_rate": 9.397114317029974e-07, + "loss": 0.0725, + "reward": 0.5197067707777023, + "reward_std": 0.8911866471171379, + "step": 26 + }, + { + "clip_fraction": 0.0, + "completion_length": 2148.781265258789, + "dapo/avg_reward_std": 0.24896243140101432, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.31666667349636557, + "dapo/num_sampling_attempts": 5.0, + "dapo/sampling_efficiency": 22.63888888888889, + "dapo/total_prompts_processed": 30.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.030857142857142857, + "grad_norm": 0.03762076795101166, + "kl": 3.104656934738159e-05, + "learning_rate": 9.316216432703916e-07, + "loss": -0.0333, + "reward": 0.5081147998571396, + "reward_std": 0.9414060413837433, + "step": 27 + }, + { + "clip_fraction": 0.0, + "completion_length": 2357.4062881469727, + "dapo/avg_reward_std": 0.22747237629750194, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2990196110571132, + "dapo/num_sampling_attempts": 4.25, + "dapo/sampling_efficiency": 34.49404761904761, + "dapo/total_prompts_processed": 25.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.032, + "grad_norm": 0.02982812374830246, + "kl": 2.621859312057495e-05, + "learning_rate": 9.230669076497687e-07, + "loss": 0.0231, + "reward": 0.7687274925410748, + "reward_std": 0.9382865354418755, + "step": 28 + }, + { + "clip_fraction": 0.0, + "completion_length": 2772.941047668457, + "dapo/avg_reward_std": 0.2300749086972439, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.28282828854792047, + "dapo/num_sampling_attempts": 4.125, + "dapo/sampling_efficiency": 48.482142857142854, + "dapo/total_prompts_processed": 24.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.03314285714285714, + "grad_norm": 0.030160676687955856, + "kl": 2.812594175338745e-05, + "learning_rate": 9.140576474687263e-07, + "loss": 0.0019, + "reward": 0.41888202354311943, + "reward_std": 0.9044449031352997, + "step": 29 + }, + { + "clip_fraction": 0.0, + "completion_length": 2038.208366394043, + "dapo/avg_reward_std": 0.1657373425437183, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.21544715943859843, + "dapo/num_sampling_attempts": 5.125, + "dapo/sampling_efficiency": 45.71969696969697, + "dapo/total_prompts_processed": 30.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.03428571428571429, + "grad_norm": 0.040263354778289795, + "kl": 3.8951635360717773e-05, + "learning_rate": 9.046048391230247e-07, + "loss": 0.0158, + "reward": 0.6328074131160975, + "reward_std": 0.913766622543335, + "step": 30 + }, + { + "clip_fraction": 0.0, + "completion_length": 2610.149299621582, + "dapo/avg_reward_std": 0.24689391613006592, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.39333333909511564, + "dapo/num_sampling_attempts": 3.125, + "dapo/sampling_efficiency": 50.74404761904762, + "dapo/total_prompts_processed": 18.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.03542857142857143, + "grad_norm": 0.03027450665831566, + "kl": 3.1307339668273926e-05, + "learning_rate": 8.9471999940354e-07, + "loss": 0.0264, + "reward": 0.6263847425580025, + "reward_std": 0.9919310808181763, + "step": 31 + }, + { + "clip_fraction": 0.0, + "completion_length": 2505.697952270508, + "dapo/avg_reward_std": 0.26817766793312564, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.34946237216072695, + "dapo/num_sampling_attempts": 3.875, + "dapo/sampling_efficiency": 33.68055555555555, + "dapo/total_prompts_processed": 23.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.036571428571428574, + "grad_norm": 0.02961750328540802, + "kl": 2.7127563953399658e-05, + "learning_rate": 8.844151714648274e-07, + "loss": 0.0166, + "reward": 0.6057538501918316, + "reward_std": 0.9584499895572662, + "step": 32 + }, + { + "clip_fraction": 0.0, + "completion_length": 2879.420181274414, + "dapo/avg_reward_std": 0.24957223816050422, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2824074120985137, + "dapo/num_sampling_attempts": 4.5, + "dapo/sampling_efficiency": 35.51136363636363, + "dapo/total_prompts_processed": 27.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.037714285714285714, + "grad_norm": 0.028292173519730568, + "kl": 2.950429916381836e-05, + "learning_rate": 8.737029101523929e-07, + "loss": 0.032, + "reward": 0.4974850555881858, + "reward_std": 0.9284666180610657, + "step": 33 + }, + { + "clip_fraction": 0.0, + "completion_length": 2605.826400756836, + "dapo/avg_reward_std": 0.27582160755991936, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.41666667101283866, + "dapo/num_sampling_attempts": 3.0, + "dapo/sampling_efficiency": 42.70833333333333, + "dapo/total_prompts_processed": 18.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.038857142857142854, + "grad_norm": 0.028110038489103317, + "kl": 3.172457218170166e-05, + "learning_rate": 8.625962667065487e-07, + "loss": 0.0358, + "reward": 0.5906332535669208, + "reward_std": 0.8970795348286629, + "step": 34 + }, + { + "clip_fraction": 0.0, + "completion_length": 2197.09033203125, + "dapo/avg_reward_std": 0.2899627904097239, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3722222303350767, + "dapo/num_sampling_attempts": 3.75, + "dapo/sampling_efficiency": 33.035714285714285, + "dapo/total_prompts_processed": 22.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.04, + "grad_norm": 0.03307325020432472, + "kl": 3.203749656677246e-05, + "learning_rate": 8.511087728614862e-07, + "loss": 0.024, + "reward": 0.6485824584960938, + "reward_std": 0.9721796959638596, + "step": 35 + }, + { + "clip_fraction": 0.0, + "completion_length": 2999.3507080078125, + "dapo/avg_reward_std": 0.20956570729613305, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.26250000707805154, + "dapo/num_sampling_attempts": 5.0, + "dapo/sampling_efficiency": 22.51488095238095, + "dapo/total_prompts_processed": 30.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.04114285714285714, + "grad_norm": 0.028769005089998245, + "kl": 3.2588839530944824e-05, + "learning_rate": 8.392544243589427e-07, + "loss": 0.0619, + "reward": 0.48274967167526484, + "reward_std": 0.8917501345276833, + "step": 36 + }, + { + "clip_fraction": 0.0, + "completion_length": 2790.3020935058594, + "dapo/avg_reward_std": 0.30638546783190507, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.42307692995438206, + "dapo/num_sampling_attempts": 3.25, + "dapo/sampling_efficiency": 35.20833333333333, + "dapo/total_prompts_processed": 19.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.04228571428571429, + "grad_norm": 0.026894288137555122, + "kl": 3.5509467124938965e-05, + "learning_rate": 8.270476638965461e-07, + "loss": 0.0283, + "reward": 0.5098943561315536, + "reward_std": 0.9712026715278625, + "step": 37 + }, + { + "clip_fraction": 0.0, + "completion_length": 2677.1493530273438, + "dapo/avg_reward_std": 0.18201035128699408, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2481481538878547, + "dapo/num_sampling_attempts": 5.625, + "dapo/sampling_efficiency": 25.416666666666664, + "dapo/total_prompts_processed": 33.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.04342857142857143, + "grad_norm": 0.027049226686358452, + "kl": 2.641230821609497e-05, + "learning_rate": 8.145033635316128e-07, + "loss": 0.0457, + "reward": 0.507211847230792, + "reward_std": 0.9677048400044441, + "step": 38 + }, + { + "clip_fraction": 0.0, + "completion_length": 3130.437530517578, + "dapo/avg_reward_std": 0.2055508976473528, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3137254956014016, + "dapo/num_sampling_attempts": 4.25, + "dapo/sampling_efficiency": 26.160714285714278, + "dapo/total_prompts_processed": 25.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.044571428571428574, + "grad_norm": 0.027378324419260025, + "kl": 4.1447579860687256e-05, + "learning_rate": 8.01636806561836e-07, + "loss": 0.0522, + "reward": 0.5557294674217701, + "reward_std": 0.9394431114196777, + "step": 39 + }, + { + "clip_fraction": 0.0, + "completion_length": 2026.0486297607422, + "dapo/avg_reward_std": 0.20257248067193562, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2916666749450896, + "dapo/num_sampling_attempts": 4.5, + "dapo/sampling_efficiency": 29.86111111111111, + "dapo/total_prompts_processed": 27.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.045714285714285714, + "grad_norm": 0.032405752688646317, + "kl": 1.9609928131103516e-05, + "learning_rate": 7.884636689049422e-07, + "loss": 0.0336, + "reward": 0.5694049745798111, + "reward_std": 0.9232507050037384, + "step": 40 + }, + { + "clip_fraction": 0.0, + "completion_length": 2640.326416015625, + "dapo/avg_reward_std": 0.21237638321789828, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.34343435231483344, + "dapo/num_sampling_attempts": 4.125, + "dapo/sampling_efficiency": 29.791666666666664, + "dapo/total_prompts_processed": 24.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.046857142857142854, + "grad_norm": 0.027951980009675026, + "kl": 2.6788562536239624e-05, + "learning_rate": 7.75e-07, + "loss": 0.0234, + "reward": 0.5206635389477015, + "reward_std": 0.9366661533713341, + "step": 41 + }, + { + "clip_fraction": 0.0, + "completion_length": 2681.18058013916, + "dapo/avg_reward_std": 0.24859387196343521, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3218390854268238, + "dapo/num_sampling_attempts": 3.625, + "dapo/sampling_efficiency": 35.416666666666664, + "dapo/total_prompts_processed": 21.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.048, + "grad_norm": 0.03045503795146942, + "kl": 3.679096698760986e-05, + "learning_rate": 7.612622032536507e-07, + "loss": 0.0237, + "reward": 0.4700614605098963, + "reward_std": 0.9389084428548813, + "step": 42 + }, + { + "clip_fraction": 0.0, + "completion_length": 2398.7118072509766, + "dapo/avg_reward_std": 0.2748411413161985, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.322580651890847, + "dapo/num_sampling_attempts": 3.875, + "dapo/sampling_efficiency": 29.999999999999996, + "dapo/total_prompts_processed": 23.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.04914285714285714, + "grad_norm": 0.02945004403591156, + "kl": 2.7336180210113525e-05, + "learning_rate": 7.472670160550848e-07, + "loss": -0.0567, + "reward": 0.6530590765178204, + "reward_std": 0.929742157459259, + "step": 43 + }, + { + "clip_fraction": 0.0, + "completion_length": 1968.3437805175781, + "dapo/avg_reward_std": 0.20995861871374977, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2685185232096248, + "dapo/num_sampling_attempts": 4.5, + "dapo/sampling_efficiency": 42.410714285714285, + "dapo/total_prompts_processed": 27.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.05028571428571429, + "grad_norm": 0.0354490801692009, + "kl": 1.671910285949707e-05, + "learning_rate": 7.330314893841101e-07, + "loss": 0.0869, + "reward": 0.6298563629388809, + "reward_std": 0.9230287447571754, + "step": 44 + }, + { + "clip_fraction": 0.0, + "completion_length": 2218.2743225097656, + "dapo/avg_reward_std": 0.260509067773819, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.36666667262713115, + "dapo/num_sampling_attempts": 3.75, + "dapo/sampling_efficiency": 33.229166666666664, + "dapo/total_prompts_processed": 22.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.05142857142857143, + "grad_norm": 0.02954520471394062, + "kl": 2.514384686946869e-05, + "learning_rate": 7.185729670371604e-07, + "loss": 0.0031, + "reward": 0.6325996220111847, + "reward_std": 0.9546400979161263, + "step": 45 + }, + { + "clip_fraction": 0.0, + "completion_length": 2081.1458587646484, + "dapo/avg_reward_std": 0.2187695243666249, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2849462402443732, + "dapo/num_sampling_attempts": 3.875, + "dapo/sampling_efficiency": 37.22222222222222, + "dapo/total_prompts_processed": 23.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.052571428571428575, + "grad_norm": 0.033979643136262894, + "kl": 2.872943878173828e-05, + "learning_rate": 7.039090644965509e-07, + "loss": -0.0104, + "reward": 0.5167231820523739, + "reward_std": 0.9025325626134872, + "step": 46 + }, + { + "clip_fraction": 0.0, + "completion_length": 2117.541702270508, + "dapo/avg_reward_std": 0.18839570879936218, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.26811594580826553, + "dapo/num_sampling_attempts": 5.75, + "dapo/sampling_efficiency": 20.441919191919194, + "dapo/total_prompts_processed": 34.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.053714285714285714, + "grad_norm": 0.03177877888083458, + "kl": 3.078579902648926e-05, + "learning_rate": 6.890576474687263e-07, + "loss": 0.0077, + "reward": 0.3684711689129472, + "reward_std": 0.8811993673443794, + "step": 47 + }, + { + "clip_fraction": 0.0, + "completion_length": 2177.4444885253906, + "dapo/avg_reward_std": 0.19605370469995448, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2702702763112816, + "dapo/num_sampling_attempts": 4.625, + "dapo/sampling_efficiency": 39.40972222222222, + "dapo/total_prompts_processed": 27.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.054857142857142854, + "grad_norm": 0.04067355766892433, + "kl": 2.4996697902679443e-05, + "learning_rate": 6.740368101176495e-07, + "loss": 0.0053, + "reward": 0.5635924749076366, + "reward_std": 0.9323460608720779, + "step": 48 + }, + { + "clip_fraction": 0.0, + "completion_length": 3022.513885498047, + "dapo/avg_reward_std": 0.22437315998655377, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.30808081003752624, + "dapo/num_sampling_attempts": 4.125, + "dapo/sampling_efficiency": 51.880411255411246, + "dapo/total_prompts_processed": 24.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.056, + "grad_norm": 0.028243908658623695, + "kl": 3.2588839530944824e-05, + "learning_rate": 6.588648530198504e-07, + "loss": 0.0463, + "reward": 0.5983518976718187, + "reward_std": 0.97667645663023, + "step": 49 + }, + { + "clip_fraction": 0.0, + "completion_length": 2369.423614501953, + "dapo/avg_reward_std": 0.25065614397709185, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.36538461996958804, + "dapo/num_sampling_attempts": 3.25, + "dapo/sampling_efficiency": 51.666666666666664, + "dapo/total_prompts_processed": 19.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.05714285714285714, + "grad_norm": 0.03361990302801132, + "kl": 2.4838373064994812e-05, + "learning_rate": 6.435602608679916e-07, + "loss": -0.0041, + "reward": 0.6849855165928602, + "reward_std": 0.9522178247570992, + "step": 50 + }, + { + "clip_fraction": 0.0, + "completion_length": 2274.833396911621, + "dapo/avg_reward_std": 0.22345838612980312, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2916666745311684, + "dapo/num_sampling_attempts": 4.5, + "dapo/sampling_efficiency": 27.132936507936506, + "dapo/total_prompts_processed": 27.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.05828571428571429, + "grad_norm": 0.031927697360515594, + "kl": 1.7890706658363342e-05, + "learning_rate": 6.281416799501187e-07, + "loss": 0.0196, + "reward": 0.8541890066117048, + "reward_std": 0.9146186113357544, + "step": 51 + }, + { + "clip_fraction": 0.0, + "completion_length": 2918.0799102783203, + "dapo/avg_reward_std": 0.28684074508732765, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3333333386429425, + "dapo/num_sampling_attempts": 3.625, + "dapo/sampling_efficiency": 41.36904761904762, + "dapo/total_prompts_processed": 21.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.05942857142857143, + "grad_norm": 0.026396779343485832, + "kl": 2.3087020963430405e-05, + "learning_rate": 6.126278954320294e-07, + "loss": 0.0343, + "reward": 0.44786757230758667, + "reward_std": 0.9706326127052307, + "step": 52 + }, + { + "clip_fraction": 0.0, + "completion_length": 2045.833339691162, + "dapo/avg_reward_std": 0.2355064716604021, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2870370431078805, + "dapo/num_sampling_attempts": 4.5, + "dapo/sampling_efficiency": 31.354166666666664, + "dapo/total_prompts_processed": 27.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.060571428571428575, + "grad_norm": 0.04913632944226265, + "kl": 2.1755695343017578e-05, + "learning_rate": 5.97037808470444e-07, + "loss": 0.0387, + "reward": 0.6510349959135056, + "reward_std": 0.9507962614297867, + "step": 53 + }, + { + "clip_fraction": 0.0, + "completion_length": 1948.9444427490234, + "dapo/avg_reward_std": 0.243668794631958, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.366666671037674, + "dapo/num_sampling_attempts": 3.125, + "dapo/sampling_efficiency": 56.5625, + "dapo/total_prompts_processed": 18.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.061714285714285715, + "grad_norm": 0.040572620928287506, + "kl": 2.1360814571380615e-05, + "learning_rate": 5.813904131848564e-07, + "loss": 0.0417, + "reward": 0.5514028863981366, + "reward_std": 0.9589040726423264, + "step": 54 + }, + { + "clip_fraction": 0.0, + "completion_length": 2484.541648864746, + "dapo/avg_reward_std": 0.30484401606596434, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.42307693224686843, + "dapo/num_sampling_attempts": 3.25, + "dapo/sampling_efficiency": 42.18749999999999, + "dapo/total_prompts_processed": 19.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.06285714285714286, + "grad_norm": 0.0297782514244318, + "kl": 2.2893771529197693e-05, + "learning_rate": 5.657047735161255e-07, + "loss": -0.0009, + "reward": 0.4546010522171855, + "reward_std": 0.9696914628148079, + "step": 55 + }, + { + "clip_fraction": 0.0, + "completion_length": 1533.7361297607422, + "dapo/avg_reward_std": 0.2159253837484302, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.29797980415098596, + "dapo/num_sampling_attempts": 4.125, + "dapo/sampling_efficiency": 34.722222222222214, + "dapo/total_prompts_processed": 24.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.064, + "grad_norm": 0.03312206640839577, + "kl": 7.178634405136108e-06, + "learning_rate": 5.5e-07, + "loss": 0.0108, + "reward": 0.7257717102766037, + "reward_std": 0.9033158496022224, + "step": 56 + }, + { + "clip_fraction": 0.0, + "completion_length": 2934.4409942626953, + "dapo/avg_reward_std": 0.2505974847337474, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.36956522192644037, + "dapo/num_sampling_attempts": 2.875, + "dapo/sampling_efficiency": 41.66666666666666, + "dapo/total_prompts_processed": 17.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.06514285714285714, + "grad_norm": 0.02451159618794918, + "kl": 1.9356608390808105e-05, + "learning_rate": 5.342952264838747e-07, + "loss": 0.0483, + "reward": 0.5572653282433748, + "reward_std": 0.9176028743386269, + "step": 57 + }, + { + "clip_fraction": 0.0, + "completion_length": 1933.5243377685547, + "dapo/avg_reward_std": 0.20699472725391388, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3235294174622087, + "dapo/num_sampling_attempts": 4.25, + "dapo/sampling_efficiency": 43.50198412698413, + "dapo/total_prompts_processed": 25.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.06628571428571428, + "grad_norm": 0.04205997660756111, + "kl": 2.446398138999939e-05, + "learning_rate": 5.186095868151436e-07, + "loss": 0.035, + "reward": 0.5425214860588312, + "reward_std": 0.9688811302185059, + "step": 58 + }, + { + "clip_fraction": 0.0, + "completion_length": 2404.819435119629, + "dapo/avg_reward_std": 0.21416518474236512, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2649572701790394, + "dapo/num_sampling_attempts": 4.875, + "dapo/sampling_efficiency": 28.070436507936506, + "dapo/total_prompts_processed": 29.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.06742857142857143, + "grad_norm": 0.032379262149333954, + "kl": 2.0030885934829712e-05, + "learning_rate": 5.02962191529556e-07, + "loss": -0.0022, + "reward": 0.5781768467277288, + "reward_std": 0.9525356665253639, + "step": 59 + }, + { + "clip_fraction": 0.0, + "completion_length": 2963.888931274414, + "dapo/avg_reward_std": 0.32426256509054274, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.42857143637679873, + "dapo/num_sampling_attempts": 2.625, + "dapo/sampling_efficiency": 58.035714285714285, + "dapo/total_prompts_processed": 15.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.06857142857142857, + "grad_norm": 0.027211569249629974, + "kl": 1.7156358808279037e-05, + "learning_rate": 4.873721045679706e-07, + "loss": 0.0068, + "reward": 0.44747511111199856, + "reward_std": 0.9607158154249191, + "step": 60 + }, + { + "clip_fraction": 0.0, + "completion_length": 2205.2465591430664, + "dapo/avg_reward_std": 0.203433408588171, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2500000063329935, + "dapo/num_sampling_attempts": 5.0, + "dapo/sampling_efficiency": 38.46153846153846, + "dapo/total_prompts_processed": 30.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.06971428571428571, + "grad_norm": 0.035166963934898376, + "kl": 1.146271824836731e-05, + "learning_rate": 4.7185832004988133e-07, + "loss": 0.0016, + "reward": 0.7233948148787022, + "reward_std": 0.9537224471569061, + "step": 61 + }, + { + "clip_fraction": 0.0, + "completion_length": 2170.302101135254, + "dapo/avg_reward_std": 0.3071755821054632, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.46212121776559134, + "dapo/num_sampling_attempts": 2.75, + "dapo/sampling_efficiency": 52.5, + "dapo/total_prompts_processed": 16.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.07085714285714285, + "grad_norm": 0.032445963472127914, + "kl": 1.7118407413363457e-05, + "learning_rate": 4.5643973913200837e-07, + "loss": 0.0133, + "reward": 0.5614959334488958, + "reward_std": 0.9226407110691071, + "step": 62 + }, + { + "clip_fraction": 0.0, + "completion_length": 2304.038215637207, + "dapo/avg_reward_std": 0.3201758420025861, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3827160596847534, + "dapo/num_sampling_attempts": 3.375, + "dapo/sampling_efficiency": 33.33333333333333, + "dapo/total_prompts_processed": 20.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.072, + "grad_norm": 0.03544686362147331, + "kl": 1.1014439223799855e-05, + "learning_rate": 4.4113514698014953e-07, + "loss": 0.0809, + "reward": 0.6520206034183502, + "reward_std": 0.9506091177463531, + "step": 63 + }, + { + "clip_fraction": 0.0, + "completion_length": 1901.3506965637207, + "dapo/avg_reward_std": 0.2710137654233862, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.33950617964620944, + "dapo/num_sampling_attempts": 3.375, + "dapo/sampling_efficiency": 38.541666666666664, + "dapo/total_prompts_processed": 20.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.07314285714285715, + "grad_norm": 0.044119708240032196, + "kl": 2.606213092803955e-05, + "learning_rate": 4.2596318988235037e-07, + "loss": 0.0059, + "reward": 0.6546321045607328, + "reward_std": 0.9510733336210251, + "step": 64 + }, + { + "clip_fraction": 0.0, + "completion_length": 2792.0382232666016, + "dapo/avg_reward_std": 0.2836403740303857, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.36904762951391085, + "dapo/num_sampling_attempts": 3.5, + "dapo/sampling_efficiency": 39.58333333333333, + "dapo/total_prompts_processed": 21.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.07428571428571429, + "grad_norm": 0.04388947784900665, + "kl": 1.2818491086363792e-05, + "learning_rate": 4.1094235253127374e-07, + "loss": 0.0675, + "reward": 0.5376700833439827, + "reward_std": 0.9546815231442451, + "step": 65 + }, + { + "clip_fraction": 0.0, + "completion_length": 3018.1111450195312, + "dapo/avg_reward_std": 0.2566617141167323, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.35000000993410746, + "dapo/num_sampling_attempts": 3.75, + "dapo/sampling_efficiency": 29.583333333333325, + "dapo/total_prompts_processed": 22.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.07542857142857143, + "grad_norm": 0.030510403215885162, + "kl": 2.337433397769928e-05, + "learning_rate": 3.9609093550344907e-07, + "loss": 0.067, + "reward": 0.45654861629009247, + "reward_std": 0.9348908290266991, + "step": 66 + }, + { + "clip_fraction": 0.0, + "completion_length": 2246.7361183166504, + "dapo/avg_reward_std": 0.17681238457963272, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2657657684506597, + "dapo/num_sampling_attempts": 4.625, + "dapo/sampling_efficiency": 39.75198412698412, + "dapo/total_prompts_processed": 27.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.07657142857142857, + "grad_norm": 0.039485227316617966, + "kl": 3.0115246772766113e-05, + "learning_rate": 3.8142703296283953e-07, + "loss": -0.0103, + "reward": 0.559457328170538, + "reward_std": 0.9844456240534782, + "step": 67 + }, + { + "clip_fraction": 0.0, + "completion_length": 1877.3090591430664, + "dapo/avg_reward_std": 0.21082516993795122, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2809523867709296, + "dapo/num_sampling_attempts": 4.375, + "dapo/sampling_efficiency": 40.13888888888889, + "dapo/total_prompts_processed": 26.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.07771428571428571, + "grad_norm": 0.04208315163850784, + "kl": 1.7916783690452576e-05, + "learning_rate": 3.6696851061588994e-07, + "loss": 0.0055, + "reward": 0.71805115416646, + "reward_std": 0.9486410617828369, + "step": 68 + }, + { + "clip_fraction": 0.0, + "completion_length": 2743.187484741211, + "dapo/avg_reward_std": 0.3629622704842511, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.5882353020064971, + "dapo/num_sampling_attempts": 2.125, + "dapo/sampling_efficiency": 57.291666666666664, + "dapo/total_prompts_processed": 12.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.07885714285714286, + "grad_norm": 0.046305615454912186, + "kl": 1.8481165170669556e-05, + "learning_rate": 3.5273298394491515e-07, + "loss": 0.0753, + "reward": 0.5533816255629063, + "reward_std": 0.9835677221417427, + "step": 69 + }, + { + "clip_fraction": 0.0, + "completion_length": 1971.8750610351562, + "dapo/avg_reward_std": 0.290031298995018, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3958333432674408, + "dapo/num_sampling_attempts": 3.0, + "dapo/sampling_efficiency": 50.11904761904761, + "dapo/total_prompts_processed": 18.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.08, + "grad_norm": 0.03249451890587807, + "kl": 1.0361894965171814e-05, + "learning_rate": 3.387377967463493e-07, + "loss": 0.0123, + "reward": 0.7815902195870876, + "reward_std": 0.9491127580404282, + "step": 70 + }, + { + "clip_fraction": 0.0, + "completion_length": 2149.5729370117188, + "dapo/avg_reward_std": 0.30720199798715525, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.37931035356274967, + "dapo/num_sampling_attempts": 3.625, + "dapo/sampling_efficiency": 31.666666666666664, + "dapo/total_prompts_processed": 21.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.08114285714285714, + "grad_norm": 0.02995998226106167, + "kl": 2.8252601623535156e-05, + "learning_rate": 3.250000000000001e-07, + "loss": 0.0769, + "reward": 0.5328625496476889, + "reward_std": 0.9026356488466263, + "step": 71 + }, + { + "clip_fraction": 0.0, + "completion_length": 1963.1562538146973, + "dapo/avg_reward_std": 0.27671699684399825, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.4551282163995963, + "dapo/num_sampling_attempts": 3.25, + "dapo/sampling_efficiency": 46.041666666666664, + "dapo/total_prompts_processed": 19.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.08228571428571428, + "grad_norm": 0.046918418258428574, + "kl": 3.359094262123108e-05, + "learning_rate": 3.115363310950578e-07, + "loss": 0.0368, + "reward": 0.32596728252246976, + "reward_std": 0.917833186686039, + "step": 72 + }, + { + "clip_fraction": 0.0, + "completion_length": 2666.1666717529297, + "dapo/avg_reward_std": 0.2536189202219248, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.34895834140479565, + "dapo/num_sampling_attempts": 4.0, + "dapo/sampling_efficiency": 37.84722222222222, + "dapo/total_prompts_processed": 24.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.08342857142857144, + "grad_norm": 0.0253219623118639, + "kl": 3.542192280292511e-05, + "learning_rate": 2.9836319343816397e-07, + "loss": 0.0107, + "reward": 0.6293175183236599, + "reward_std": 0.935965321958065, + "step": 73 + }, + { + "clip_fraction": 0.0, + "completion_length": 2119.447982788086, + "dapo/avg_reward_std": 0.26048696994781495, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.4200000029802322, + "dapo/num_sampling_attempts": 3.125, + "dapo/sampling_efficiency": 47.291666666666664, + "dapo/total_prompts_processed": 18.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.08457142857142858, + "grad_norm": 0.034480538219213486, + "kl": 1.7508864402770996e-05, + "learning_rate": 2.854966364683872e-07, + "loss": 0.0483, + "reward": 0.7494360618293285, + "reward_std": 0.9492424502968788, + "step": 74 + }, + { + "clip_fraction": 0.0, + "completion_length": 2078.9375, + "dapo/avg_reward_std": 0.2828026126932215, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3580246976128331, + "dapo/num_sampling_attempts": 3.375, + "dapo/sampling_efficiency": 35.11904761904762, + "dapo/total_prompts_processed": 20.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.08571428571428572, + "grad_norm": 0.03545458987355232, + "kl": 1.3923272490501404e-05, + "learning_rate": 2.729523361034538e-07, + "loss": 0.0531, + "reward": 0.5464182365685701, + "reward_std": 0.9530047550797462, + "step": 75 + }, + { + "clip_fraction": 0.0, + "completion_length": 2342.5416564941406, + "dapo/avg_reward_std": 0.21854268149896103, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3080808154561303, + "dapo/num_sampling_attempts": 4.125, + "dapo/sampling_efficiency": 32.341269841269835, + "dapo/total_prompts_processed": 24.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.08685714285714285, + "grad_norm": 0.02881987765431404, + "kl": 1.169554889202118e-05, + "learning_rate": 2.6074557564105724e-07, + "loss": 0.0077, + "reward": 0.5642017107456923, + "reward_std": 0.9335212334990501, + "step": 76 + }, + { + "clip_fraction": 0.0, + "completion_length": 3205.104217529297, + "dapo/avg_reward_std": 0.2153491945493789, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2777777835726738, + "dapo/num_sampling_attempts": 5.25, + "dapo/sampling_efficiency": 23.45238095238095, + "dapo/total_prompts_processed": 31.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.088, + "grad_norm": 0.024909108877182007, + "kl": 2.2567808628082275e-05, + "learning_rate": 2.488912271385139e-07, + "loss": 0.0436, + "reward": 0.4511043671518564, + "reward_std": 0.9582105726003647, + "step": 77 + }, + { + "clip_fraction": 0.0, + "completion_length": 1984.7881927490234, + "dapo/avg_reward_std": 0.2325562967194451, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3703703780968984, + "dapo/num_sampling_attempts": 3.375, + "dapo/sampling_efficiency": 46.354166666666664, + "dapo/total_prompts_processed": 20.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.08914285714285715, + "grad_norm": 0.04120900481939316, + "kl": 2.2590160369873047e-05, + "learning_rate": 2.374037332934512e-07, + "loss": 0.0514, + "reward": 0.46765367314219475, + "reward_std": 0.9171552434563637, + "step": 78 + }, + { + "clip_fraction": 0.0, + "completion_length": 2322.930576324463, + "dapo/avg_reward_std": 0.24565138667821884, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.35416666977107525, + "dapo/num_sampling_attempts": 3.0, + "dapo/sampling_efficiency": 49.375, + "dapo/total_prompts_processed": 18.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.09028571428571429, + "grad_norm": 0.03351881355047226, + "kl": 1.6979873180389404e-05, + "learning_rate": 2.2629708984760706e-07, + "loss": 0.0813, + "reward": 0.4460947550833225, + "reward_std": 0.9485716819763184, + "step": 79 + }, + { + "clip_fraction": 0.0, + "completion_length": 2418.187545776367, + "dapo/avg_reward_std": 0.23119631229024945, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2929292975953131, + "dapo/num_sampling_attempts": 4.125, + "dapo/sampling_efficiency": 37.013888888888886, + "dapo/total_prompts_processed": 24.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.09142857142857143, + "grad_norm": 0.03444164991378784, + "kl": 1.9297003746032715e-05, + "learning_rate": 2.1558482853517253e-07, + "loss": -0.0123, + "reward": 0.47735430393368006, + "reward_std": 0.9275016784667969, + "step": 80 + }, + { + "clip_fraction": 0.0, + "completion_length": 2673.1666870117188, + "dapo/avg_reward_std": 0.29530651973826544, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.39285714977553915, + "dapo/num_sampling_attempts": 3.5, + "dapo/sampling_efficiency": 40.52083333333333, + "dapo/total_prompts_processed": 21.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.09257142857142857, + "grad_norm": 0.02858138270676136, + "kl": 1.998385414481163e-05, + "learning_rate": 2.0528000059645995e-07, + "loss": 0.034, + "reward": 0.41152474470436573, + "reward_std": 0.9514285027980804, + "step": 81 + }, + { + "clip_fraction": 0.0, + "completion_length": 2257.954864501953, + "dapo/avg_reward_std": 0.23162428935368856, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3277777835726738, + "dapo/num_sampling_attempts": 3.75, + "dapo/sampling_efficiency": 39.72222222222222, + "dapo/total_prompts_processed": 22.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.09371428571428571, + "grad_norm": 0.034180980175733566, + "kl": 1.03069469332695e-05, + "learning_rate": 7.681643291108517e-07, + "loss": 0.0478, + "reward": 0.6525773257017136, + "reward_std": 0.9826234132051468, + "step": 82 + }, + { + "clip_fraction": 0.0, + "completion_length": 2630.8507080078125, + "dapo/avg_reward_std": 0.25974711243595394, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3511904797383717, + "dapo/num_sampling_attempts": 3.5, + "dapo/sampling_efficiency": 49.166666666666664, + "dapo/total_prompts_processed": 21.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.09485714285714286, + "grad_norm": 0.03644736111164093, + "kl": 1.800060272216797e-05, + "learning_rate": 7.612622032536507e-07, + "loss": 0.0921, + "reward": 0.4112757742404938, + "reward_std": 0.9365755990147591, + "step": 83 + }, + { + "clip_fraction": 0.0, + "completion_length": 2569.4896087646484, + "dapo/avg_reward_std": 0.20397330891518367, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.22619048080274037, + "dapo/num_sampling_attempts": 5.25, + "dapo/sampling_efficiency": 33.541666666666664, + "dapo/total_prompts_processed": 31.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.096, + "grad_norm": 0.027630111202597618, + "kl": 9.745359420776367e-06, + "learning_rate": 7.54295724882796e-07, + "loss": 0.0357, + "reward": 0.41497555933892727, + "reward_std": 0.9506618455052376, + "step": 84 + }, + { + "clip_fraction": 0.0, + "completion_length": 2213.0660400390625, + "dapo/avg_reward_std": 0.2754218357224618, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.33333334038334506, + "dapo/num_sampling_attempts": 3.875, + "dapo/sampling_efficiency": 36.354166666666664, + "dapo/total_prompts_processed": 23.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.09714285714285714, + "grad_norm": 0.035216327756643295, + "kl": 1.6536563634872437e-05, + "learning_rate": 7.472670160550848e-07, + "loss": 0.0527, + "reward": 0.632079154253006, + "reward_std": 0.9386599361896515, + "step": 85 + }, + { + "clip_fraction": 0.0, + "completion_length": 2339.1215209960938, + "dapo/avg_reward_std": 0.24339192857344946, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.291666673289405, + "dapo/num_sampling_attempts": 4.5, + "dapo/sampling_efficiency": 35.3125, + "dapo/total_prompts_processed": 27.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.09828571428571428, + "grad_norm": 0.03125083073973656, + "kl": 1.6085803508758545e-05, + "learning_rate": 7.401782177833147e-07, + "loss": -0.0221, + "reward": 0.4631906310096383, + "reward_std": 0.9198382347822189, + "step": 86 + }, + { + "clip_fraction": 0.0, + "completion_length": 1837.8993301391602, + "dapo/avg_reward_std": 0.22774873872598012, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3777777845660845, + "dapo/num_sampling_attempts": 3.75, + "dapo/sampling_efficiency": 46.87499999999999, + "dapo/total_prompts_processed": 22.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.09942857142857142, + "grad_norm": 0.04138842225074768, + "kl": 1.7467886209487915e-05, + "learning_rate": 7.330314893841101e-07, + "loss": 0.0024, + "reward": 0.7271542213857174, + "reward_std": 0.905590832233429, + "step": 87 + }, + { + "clip_fraction": 0.0, + "completion_length": 2786.0416564941406, + "dapo/avg_reward_std": 0.2095056755202157, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2952381019081388, + "dapo/num_sampling_attempts": 4.375, + "dapo/sampling_efficiency": 35.65972222222222, + "dapo/total_prompts_processed": 26.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.10057142857142858, + "grad_norm": 0.025848887860774994, + "kl": 7.427297532558441e-06, + "learning_rate": 7.258290078201731e-07, + "loss": 0.002, + "reward": 0.43730420619249344, + "reward_std": 0.9195110127329826, + "step": 88 + }, + { + "clip_fraction": 0.0, + "completion_length": 2346.68754196167, + "dapo/avg_reward_std": 0.19395678072440914, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2560975657003682, + "dapo/num_sampling_attempts": 5.125, + "dapo/sampling_efficiency": 35.01488095238095, + "dapo/total_prompts_processed": 30.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.10171428571428572, + "grad_norm": 0.040970027446746826, + "kl": 1.3796612620353699e-05, + "learning_rate": 7.185729670371604e-07, + "loss": 0.0476, + "reward": 0.6351554682478309, + "reward_std": 0.8568265736103058, + "step": 89 + }, + { + "clip_fraction": 0.0, + "completion_length": 2486.21875, + "dapo/avg_reward_std": 0.2474305311153675, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3735632284961898, + "dapo/num_sampling_attempts": 3.625, + "dapo/sampling_efficiency": 37.61904761904762, + "dapo/total_prompts_processed": 21.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.10285714285714286, + "grad_norm": 0.030587567016482353, + "kl": 1.4983117580413818e-05, + "learning_rate": 7.11265577295385e-07, + "loss": 0.0254, + "reward": 0.6515812119469047, + "reward_std": 0.9235646799206734, + "step": 90 + }, + { + "clip_fraction": 0.0, + "completion_length": 2515.017402648926, + "dapo/avg_reward_std": 0.25874078144197876, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3913043562484824, + "dapo/num_sampling_attempts": 2.875, + "dapo/sampling_efficiency": 51.56249999999999, + "dapo/total_prompts_processed": 17.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.104, + "grad_norm": 0.031289275735616684, + "kl": 6.1551108956336975e-06, + "learning_rate": 7.039090644965509e-07, + "loss": 0.0328, + "reward": 0.6403396036475897, + "reward_std": 0.9428967460989952, + "step": 91 + }, + { + "clip_fraction": 0.0, + "completion_length": 2979.027801513672, + "dapo/avg_reward_std": 0.2504267347486396, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2543859713171658, + "dapo/num_sampling_attempts": 4.75, + "dapo/sampling_efficiency": 35.63041125541125, + "dapo/total_prompts_processed": 28.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.10514285714285715, + "grad_norm": 0.029049718752503395, + "kl": -1.2740492820739746e-06, + "learning_rate": 6.965056695057204e-07, + "loss": 0.0314, + "reward": 0.535519327968359, + "reward_std": 0.8926167041063309, + "step": 92 + }, + { + "clip_fraction": 0.0, + "completion_length": 2552.562515258789, + "dapo/avg_reward_std": 0.2413217886801689, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.33333334038334506, + "dapo/num_sampling_attempts": 3.875, + "dapo/sampling_efficiency": 34.791666666666664, + "dapo/total_prompts_processed": 23.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.10628571428571429, + "grad_norm": 0.03139115869998932, + "kl": 1.3202428817749023e-05, + "learning_rate": 6.890576474687263e-07, + "loss": 0.067, + "reward": 0.6561751328408718, + "reward_std": 0.9787176623940468, + "step": 93 + }, + { + "clip_fraction": 0.0, + "completion_length": 2403.184051513672, + "dapo/avg_reward_std": 0.29813223962600416, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.40384616129673445, + "dapo/num_sampling_attempts": 3.25, + "dapo/sampling_efficiency": 40.416666666666664, + "dapo/total_prompts_processed": 19.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.10742857142857143, + "grad_norm": 0.032709378749132156, + "kl": 2.093333750963211e-05, + "learning_rate": 6.815672671252315e-07, + "loss": 0.0328, + "reward": 0.556912356056273, + "reward_std": 0.9464646279811859, + "step": 94 + }, + { + "clip_fraction": 0.0, + "completion_length": 2963.795181274414, + "dapo/avg_reward_std": 0.2564438986472594, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.26068376577817476, + "dapo/num_sampling_attempts": 4.875, + "dapo/sampling_efficiency": 24.07738095238095, + "dapo/total_prompts_processed": 29.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.10857142857142857, + "grad_norm": 0.023549171164631844, + "kl": 9.554903954267502e-06, + "learning_rate": 6.740368101176495e-07, + "loss": 0.0142, + "reward": 0.3492610058747232, + "reward_std": 0.8781530037522316, + "step": 95 + }, + { + "clip_fraction": 0.0, + "completion_length": 2655.21875, + "dapo/avg_reward_std": 0.31138683449138294, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.46969697827642615, + "dapo/num_sampling_attempts": 2.75, + "dapo/sampling_efficiency": 43.74999999999999, + "dapo/total_prompts_processed": 16.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.10971428571428571, + "grad_norm": 0.03213554993271828, + "kl": 1.945020630955696e-05, + "learning_rate": 6.664685702961344e-07, + "loss": 0.0357, + "reward": 0.4872458651661873, + "reward_std": 0.9538498669862747, + "step": 96 + }, + { + "clip_fraction": 0.0, + "completion_length": 2325.888900756836, + "dapo/avg_reward_std": 0.18781672976911068, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2968750069849193, + "dapo/num_sampling_attempts": 4.0, + "dapo/sampling_efficiency": 38.263888888888886, + "dapo/total_prompts_processed": 24.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.11085714285714286, + "grad_norm": 0.03308973088860512, + "kl": 1.2524658814072609e-05, + "learning_rate": 6.588648530198504e-07, + "loss": 0.0332, + "reward": 0.5582090672105551, + "reward_std": 0.9704806208610535, + "step": 97 + }, + { + "clip_fraction": 0.0, + "completion_length": 2980.78125, + "dapo/avg_reward_std": 0.22120360245830134, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.29824561900214147, + "dapo/num_sampling_attempts": 4.75, + "dapo/sampling_efficiency": 34.717261904761905, + "dapo/total_prompts_processed": 28.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.112, + "grad_norm": 0.02593560516834259, + "kl": 9.87970270216465e-06, + "learning_rate": 6.512279744547392e-07, + "loss": 0.0537, + "reward": 0.5110117536969483, + "reward_std": 0.9140844419598579, + "step": 98 + }, + { + "clip_fraction": 0.0, + "completion_length": 2679.701400756836, + "dapo/avg_reward_std": 0.22513854503631592, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.388888892200258, + "dapo/num_sampling_attempts": 3.375, + "dapo/sampling_efficiency": 40.104166666666664, + "dapo/total_prompts_processed": 20.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.11314285714285714, + "grad_norm": 0.028198201209306717, + "kl": -2.773245796561241e-06, + "learning_rate": 6.435602608679916e-07, + "loss": 0.0223, + "reward": 0.5703150723129511, + "reward_std": 0.9169064536690712, + "step": 99 + }, + { + "clip_fraction": 0.0, + "completion_length": 2113.7396087646484, + "dapo/avg_reward_std": 0.2158526074555185, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2916666724615627, + "dapo/num_sampling_attempts": 4.5, + "dapo/sampling_efficiency": 30.823863636363633, + "dapo/total_prompts_processed": 27.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.11428571428571428, + "grad_norm": 0.032321903854608536, + "kl": 2.765655517578125e-05, + "learning_rate": 6.358640479194451e-07, + "loss": 0.037, + "reward": 0.552736995741725, + "reward_std": 0.929665133357048, + "step": 100 + }, + { + "clip_fraction": 0.0, + "completion_length": 2397.545135498047, + "dapo/avg_reward_std": 0.2640196681022644, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.41304348603538843, + "dapo/num_sampling_attempts": 2.875, + "dapo/sampling_efficiency": 43.75, + "dapo/total_prompts_processed": 17.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.11542857142857142, + "grad_norm": 0.030507881194353104, + "kl": 1.4653429388999939e-05, + "learning_rate": 6.281416799501187e-07, + "loss": 0.0216, + "reward": 0.7607237044721842, + "reward_std": 0.9413916915655136, + "step": 101 + }, + { + "clip_fraction": 0.0, + "completion_length": 2775.312515258789, + "dapo/avg_reward_std": 0.26319959415839267, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3910256509597485, + "dapo/num_sampling_attempts": 3.25, + "dapo/sampling_efficiency": 38.95833333333333, + "dapo/total_prompts_processed": 19.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.11657142857142858, + "grad_norm": 0.028825754299759865, + "kl": 1.7821788787841797e-05, + "learning_rate": 6.203955092681039e-07, + "loss": -0.0059, + "reward": 0.4367541056126356, + "reward_std": 0.9408165961503983, + "step": 102 + }, + { + "clip_fraction": 0.0, + "completion_length": 2606.3194580078125, + "dapo/avg_reward_std": 0.22601407093386497, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.295698931620967, + "dapo/num_sampling_attempts": 3.875, + "dapo/sampling_efficiency": 30.624999999999993, + "dapo/total_prompts_processed": 23.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.11771428571428572, + "grad_norm": 0.029979709535837173, + "kl": 2.3851171135902405e-06, + "learning_rate": 6.126278954320294e-07, + "loss": 0.0463, + "reward": 0.6886496935039759, + "reward_std": 0.9053627252578735, + "step": 103 + }, + { + "clip_fraction": 0.0, + "completion_length": 2084.829849243164, + "dapo/avg_reward_std": 0.22010741523794225, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2702702747003452, + "dapo/num_sampling_attempts": 4.625, + "dapo/sampling_efficiency": 32.51488095238095, + "dapo/total_prompts_processed": 27.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.11885714285714286, + "grad_norm": 0.04769710823893547, + "kl": 2.0613893866539e-05, + "learning_rate": 6.048412045323164e-07, + "loss": 0.1162, + "reward": 0.684872523881495, + "reward_std": 0.9595381543040276, + "step": 104 + }, + { + "clip_fraction": 0.0, + "completion_length": 1955.1354484558105, + "dapo/avg_reward_std": 0.2937169720729192, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.42361111504336196, + "dapo/num_sampling_attempts": 3.0, + "dapo/sampling_efficiency": 49.166666666666664, + "dapo/total_prompts_processed": 18.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.12, + "grad_norm": 0.04352044314146042, + "kl": 2.0936131477355957e-05, + "learning_rate": 5.97037808470444e-07, + "loss": -0.0017, + "reward": 0.6524754576385021, + "reward_std": 0.9669848829507828, + "step": 105 + }, + { + "clip_fraction": 0.0, + "completion_length": 2316.0486221313477, + "dapo/avg_reward_std": 0.2529407059773803, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3020833423361182, + "dapo/num_sampling_attempts": 4.0, + "dapo/sampling_efficiency": 30.729166666666664, + "dapo/total_prompts_processed": 24.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.12114285714285715, + "grad_norm": 0.03129468858242035, + "kl": 1.8656253814697266e-05, + "learning_rate": 5.892200842364462e-07, + "loss": -0.0284, + "reward": 0.6108895651996136, + "reward_std": 0.9319325312972069, + "step": 106 + }, + { + "clip_fraction": 0.0, + "completion_length": 2094.6909942626953, + "dapo/avg_reward_std": 0.2037892586655087, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2629629688130485, + "dapo/num_sampling_attempts": 5.625, + "dapo/sampling_efficiency": 21.066919191919194, + "dapo/total_prompts_processed": 33.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.12228571428571429, + "grad_norm": 0.038948290050029755, + "kl": 2.824072726070881e-05, + "learning_rate": 5.813904131848564e-07, + "loss": 0.0748, + "reward": 0.48047966323792934, + "reward_std": 0.9251860752701759, + "step": 107 + }, + { + "clip_fraction": 0.0, + "completion_length": 2482.6146240234375, + "dapo/avg_reward_std": 0.19606016278266908, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.22592593100335862, + "dapo/num_sampling_attempts": 5.625, + "dapo/sampling_efficiency": 21.577380952380953, + "dapo/total_prompts_processed": 33.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.12342857142857143, + "grad_norm": 0.027610260993242264, + "kl": 1.3685785233974457e-05, + "learning_rate": 5.735511803093248e-07, + "loss": 0.0016, + "reward": 0.46788009256124496, + "reward_std": 0.9522990807890892, + "step": 108 + }, + { + "clip_fraction": 0.0, + "completion_length": 3010.541717529297, + "dapo/avg_reward_std": 0.23601235449314117, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.38461538977347887, + "dapo/num_sampling_attempts": 3.25, + "dapo/sampling_efficiency": 61.5530303030303, + "dapo/total_prompts_processed": 19.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.12457142857142857, + "grad_norm": 0.031469572335481644, + "kl": 2.0675361156463623e-05, + "learning_rate": 5.657047735161255e-07, + "loss": 0.0491, + "reward": 0.6003496535122395, + "reward_std": 0.9582010880112648, + "step": 109 + }, + { + "clip_fraction": 0.0, + "completion_length": 2550.388931274414, + "dapo/avg_reward_std": 0.24275302588939668, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3222222273548444, + "dapo/num_sampling_attempts": 3.75, + "dapo/sampling_efficiency": 41.666666666666664, + "dapo/total_prompts_processed": 22.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.12571428571428572, + "grad_norm": 0.03043791465461254, + "kl": 1.619383692741394e-05, + "learning_rate": 5.578535828967777e-07, + "loss": 0.0395, + "reward": 0.6210233392193913, + "reward_std": 0.9545274153351784, + "step": 110 + }, + { + "clip_fraction": 0.0, + "completion_length": 2248.6771240234375, + "dapo/avg_reward_std": 0.2556017003953457, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.32291667349636555, + "dapo/num_sampling_attempts": 4.0, + "dapo/sampling_efficiency": 40.451388888888886, + "dapo/total_prompts_processed": 24.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.12685714285714286, + "grad_norm": 0.029558613896369934, + "kl": 1.7130747437477112e-05, + "learning_rate": 5.5e-07, + "loss": 0.0156, + "reward": 0.8898655958473682, + "reward_std": 0.8961458280682564, + "step": 111 + }, + { + "clip_fraction": 0.0, + "completion_length": 2790.4132537841797, + "dapo/avg_reward_std": 0.2798377914088113, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.35714286299688475, + "dapo/num_sampling_attempts": 3.5, + "dapo/sampling_efficiency": 32.291666666666664, + "dapo/total_prompts_processed": 21.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.128, + "grad_norm": 0.02665926143527031, + "kl": 2.7702553779818118e-05, + "learning_rate": 5.421464171032224e-07, + "loss": 0.0375, + "reward": 0.4765107296407223, + "reward_std": 0.9586756750941277, + "step": 112 + }, + { + "clip_fraction": 0.0, + "completion_length": 2058.163261413574, + "dapo/avg_reward_std": 0.21719616024117722, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2850877270102501, + "dapo/num_sampling_attempts": 4.75, + "dapo/sampling_efficiency": 36.13636363636364, + "dapo/total_prompts_processed": 28.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.12914285714285714, + "grad_norm": 0.03724399581551552, + "kl": 9.129568934440613e-05, + "learning_rate": 5.342952264838747e-07, + "loss": 0.0308, + "reward": 0.5965504869818687, + "reward_std": 0.9517285376787186, + "step": 113 + }, + { + "clip_fraction": 0.0, + "completion_length": 1804.7569427490234, + "dapo/avg_reward_std": 0.22654692203767837, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.30645161819073463, + "dapo/num_sampling_attempts": 3.875, + "dapo/sampling_efficiency": 46.800595238095234, + "dapo/total_prompts_processed": 23.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.13028571428571428, + "grad_norm": 0.0444670133292675, + "kl": 3.589317202568054e-05, + "learning_rate": 5.264488196906752e-07, + "loss": 0.0217, + "reward": 0.4887783471494913, + "reward_std": 0.9572358801960945, + "step": 114 + }, + { + "clip_fraction": 0.0, + "completion_length": 2705.472236633301, + "dapo/avg_reward_std": 0.24942583271435328, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.4285714335384823, + "dapo/num_sampling_attempts": 2.625, + "dapo/sampling_efficiency": 51.45833333333333, + "dapo/total_prompts_processed": 15.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.13142857142857142, + "grad_norm": 0.027661452069878578, + "kl": 1.307763159275055e-05, + "learning_rate": 5.186095868151436e-07, + "loss": -0.022, + "reward": 0.5754544343799353, + "reward_std": 0.9811793565750122, + "step": 115 + }, + { + "clip_fraction": 0.0, + "completion_length": 1660.2222213745117, + "dapo/avg_reward_std": 0.20845345951415398, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.30630631100487066, + "dapo/num_sampling_attempts": 4.625, + "dapo/sampling_efficiency": 32.013888888888886, + "dapo/total_prompts_processed": 27.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.13257142857142856, + "grad_norm": 0.03922427445650101, + "kl": 7.28946179151535e-06, + "learning_rate": 5.107799157635538e-07, + "loss": 0.0279, + "reward": 0.8034113459289074, + "reward_std": 0.9163173362612724, + "step": 116 + }, + { + "clip_fraction": 0.0, + "completion_length": 2143.3368377685547, + "dapo/avg_reward_std": 0.25861393963849105, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3456790193363472, + "dapo/num_sampling_attempts": 3.375, + "dapo/sampling_efficiency": 38.95833333333333, + "dapo/total_prompts_processed": 20.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.1337142857142857, + "grad_norm": 0.0386907123029232, + "kl": 2.8124195523560047e-05, + "learning_rate": 5.02962191529556e-07, + "loss": 0.0157, + "reward": 0.5698221866041422, + "reward_std": 0.9738077968358994, + "step": 117 + }, + { + "clip_fraction": 0.0, + "completion_length": 2709.371551513672, + "dapo/avg_reward_std": 0.17381487890731456, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.26356589343658715, + "dapo/num_sampling_attempts": 5.375, + "dapo/sampling_efficiency": 31.522817460317455, + "dapo/total_prompts_processed": 32.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.13485714285714287, + "grad_norm": 0.03524978086352348, + "kl": 2.0368024706840515e-05, + "learning_rate": 4.951587954676837e-07, + "loss": 0.073, + "reward": 0.5433152373880148, + "reward_std": 0.9576972275972366, + "step": 118 + }, + { + "clip_fraction": 0.0, + "completion_length": 2729.6458129882812, + "dapo/avg_reward_std": 0.2853468172252178, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.31770834140479565, + "dapo/num_sampling_attempts": 4.0, + "dapo/sampling_efficiency": 38.13988095238095, + "dapo/total_prompts_processed": 24.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.136, + "grad_norm": 0.035877469927072525, + "kl": 9.79006290435791e-06, + "learning_rate": 4.873721045679706e-07, + "loss": 0.0223, + "reward": 0.4996686838567257, + "reward_std": 0.9503490626811981, + "step": 119 + }, + { + "clip_fraction": 0.0, + "completion_length": 2456.458351135254, + "dapo/avg_reward_std": 0.3290893492244539, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.5000000127724239, + "dapo/num_sampling_attempts": 2.625, + "dapo/sampling_efficiency": 40.62499999999999, + "dapo/total_prompts_processed": 15.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.13714285714285715, + "grad_norm": 0.03583266958594322, + "kl": 9.331852197647095e-06, + "learning_rate": 4.79604490731896e-07, + "loss": 0.0363, + "reward": 0.8003920987248421, + "reward_std": 0.955727644264698, + "step": 120 + }, + { + "clip_fraction": 0.0, + "completion_length": 2489.1875, + "dapo/avg_reward_std": 0.1615937834694272, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.22222222600664412, + "dapo/num_sampling_attempts": 5.25, + "dapo/sampling_efficiency": 37.41987179487179, + "dapo/total_prompts_processed": 31.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.1382857142857143, + "grad_norm": 0.027044769376516342, + "kl": 2.0619481801986694e-05, + "learning_rate": 4.7185832004988133e-07, + "loss": 0.0123, + "reward": 0.5692465994507074, + "reward_std": 0.9356264397501945, + "step": 121 + }, + { + "clip_fraction": 0.0, + "completion_length": 2946.687530517578, + "dapo/avg_reward_std": 0.26767816713878084, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3452381023338863, + "dapo/num_sampling_attempts": 3.5, + "dapo/sampling_efficiency": 33.75, + "dapo/total_prompts_processed": 21.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.13942857142857143, + "grad_norm": 0.03187067061662674, + "kl": 2.1383166313171387e-05, + "learning_rate": 4.641359520805548e-07, + "loss": 0.0722, + "reward": 0.42231168132275343, + "reward_std": 0.9001481607556343, + "step": 122 + }, + { + "clip_fraction": 0.0, + "completion_length": 1841.1458206176758, + "dapo/avg_reward_std": 0.32384763956069945, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.4000000065565109, + "dapo/num_sampling_attempts": 3.125, + "dapo/sampling_efficiency": 41.041666666666664, + "dapo/total_prompts_processed": 18.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.14057142857142857, + "grad_norm": 0.03784916177392006, + "kl": 4.2632222175598145e-05, + "learning_rate": 4.5643973913200837e-07, + "loss": 0.0367, + "reward": 0.6476083844900131, + "reward_std": 0.908843033015728, + "step": 123 + }, + { + "clip_fraction": 0.0, + "completion_length": 2392.166702270508, + "dapo/avg_reward_std": 0.26674444922085466, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3218390869683233, + "dapo/num_sampling_attempts": 3.625, + "dapo/sampling_efficiency": 31.666666666666664, + "dapo/total_prompts_processed": 21.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.1417142857142857, + "grad_norm": 0.02941369265317917, + "kl": 2.299714833498001e-05, + "learning_rate": 4.4877202554526084e-07, + "loss": 0.0152, + "reward": 0.5824479665607214, + "reward_std": 0.9478363320231438, + "step": 124 + }, + { + "clip_fraction": 0.0, + "completion_length": 3125.159713745117, + "dapo/avg_reward_std": 0.29309388995170593, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.5000000049670538, + "dapo/num_sampling_attempts": 2.625, + "dapo/sampling_efficiency": 51.45833333333333, + "dapo/total_prompts_processed": 15.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.14285714285714285, + "grad_norm": 0.030095171183347702, + "kl": 3.2413750886917114e-05, + "learning_rate": 4.4113514698014953e-07, + "loss": 0.0534, + "reward": 0.5003506469074637, + "reward_std": 0.8919698372483253, + "step": 125 + }, + { + "clip_fraction": 0.0, + "completion_length": 2462.8368377685547, + "dapo/avg_reward_std": 0.2680182981491089, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3466666728258133, + "dapo/num_sampling_attempts": 3.125, + "dapo/sampling_efficiency": 46.87499999999999, + "dapo/total_prompts_processed": 18.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.144, + "grad_norm": 0.04286734014749527, + "kl": 5.683675408363342e-05, + "learning_rate": 4.3353142970386557e-07, + "loss": 0.0028, + "reward": 0.5951744802296162, + "reward_std": 0.9584252312779427, + "step": 126 + }, + { + "clip_fraction": 0.0, + "completion_length": 2443.4618225097656, + "dapo/avg_reward_std": 0.19895405417833573, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2820512862541737, + "dapo/num_sampling_attempts": 4.875, + "dapo/sampling_efficiency": 33.90376984126984, + "dapo/total_prompts_processed": 29.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.14514285714285713, + "grad_norm": 0.03486345708370209, + "kl": 2.958625555038452e-05, + "learning_rate": 4.2596318988235037e-07, + "loss": -0.0055, + "reward": 0.7111770529299974, + "reward_std": 0.9570346251130104, + "step": 127 + }, + { + "clip_fraction": 0.0, + "completion_length": 2227.385452270508, + "dapo/avg_reward_std": 0.22934340153421676, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.33333333688122885, + "dapo/num_sampling_attempts": 3.5, + "dapo/sampling_efficiency": 52.291666666666664, + "dapo/total_prompts_processed": 21.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.1462857142857143, + "grad_norm": 0.04721139743924141, + "kl": 3.547314554452896e-05, + "learning_rate": 4.1843273287476854e-07, + "loss": 0.1085, + "reward": 0.4447980001568794, + "reward_std": 0.951726958155632, + "step": 128 + }, + { + "clip_fraction": 0.0, + "completion_length": 2883.357681274414, + "dapo/avg_reward_std": 0.4109063148498535, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.6777777880430221, + "dapo/num_sampling_attempts": 1.875, + "dapo/sampling_efficiency": 65.625, + "dapo/total_prompts_processed": 11.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.14742857142857144, + "grad_norm": 0.02544778771698475, + "kl": 9.082257747650146e-06, + "learning_rate": 4.1094235253127374e-07, + "loss": 0.046, + "reward": 0.6885830331593752, + "reward_std": 0.9739237055182457, + "step": 129 + }, + { + "clip_fraction": 0.0, + "completion_length": 2122.795181274414, + "dapo/avg_reward_std": 0.2591241377371329, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3641975356472863, + "dapo/num_sampling_attempts": 3.375, + "dapo/sampling_efficiency": 39.70238095238095, + "dapo/total_prompts_processed": 20.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.14857142857142858, + "grad_norm": 0.03150525689125061, + "kl": 3.223586827516556e-05, + "learning_rate": 4.034943304942796e-07, + "loss": 0.0306, + "reward": 0.5525269485078752, + "reward_std": 0.9417792037129402, + "step": 130 + }, + { + "clip_fraction": 0.0, + "completion_length": 2306.8611450195312, + "dapo/avg_reward_std": 0.3414611066209859, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3908046078065346, + "dapo/num_sampling_attempts": 3.625, + "dapo/sampling_efficiency": 32.410714285714285, + "dapo/total_prompts_processed": 21.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.14971428571428572, + "grad_norm": 0.036385975778102875, + "kl": 4.038959741592407e-05, + "learning_rate": 3.9609093550344907e-07, + "loss": 0.0679, + "reward": 0.5595943983644247, + "reward_std": 0.9294908344745636, + "step": 131 + }, + { + "clip_fraction": 0.0, + "completion_length": 2100.4444694519043, + "dapo/avg_reward_std": 0.22894747753938038, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.34444445222616193, + "dapo/num_sampling_attempts": 3.75, + "dapo/sampling_efficiency": 38.541666666666664, + "dapo/total_prompts_processed": 22.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.15085714285714286, + "grad_norm": 0.05820675194263458, + "kl": 7.29486346244812e-05, + "learning_rate": 3.8873442270461485e-07, + "loss": 0.0548, + "reward": 0.5259249797090888, + "reward_std": 0.9095494002103806, + "step": 132 + }, + { + "clip_fraction": 0.0, + "completion_length": 2399.0555725097656, + "dapo/avg_reward_std": 0.2968884447346563, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.4057971057684525, + "dapo/num_sampling_attempts": 2.875, + "dapo/sampling_efficiency": 48.33333333333333, + "dapo/total_prompts_processed": 17.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.152, + "grad_norm": 0.03143748641014099, + "kl": 1.6003847122192383e-05, + "learning_rate": 3.8142703296283953e-07, + "loss": 0.0154, + "reward": 0.6293735019862652, + "reward_std": 0.9267243668437004, + "step": 133 + }, + { + "clip_fraction": 0.0, + "completion_length": 2028.9653091430664, + "dapo/avg_reward_std": 0.24916886538267136, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.4097222276031971, + "dapo/num_sampling_attempts": 3.0, + "dapo/sampling_efficiency": 45.83333333333333, + "dapo/total_prompts_processed": 18.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.15314285714285714, + "grad_norm": 0.03667714074254036, + "kl": 2.6845373213291168e-05, + "learning_rate": 3.7417099217982686e-07, + "loss": 0.0108, + "reward": 0.6901863785460591, + "reward_std": 0.9471788480877876, + "step": 134 + }, + { + "clip_fraction": 0.0, + "completion_length": 2116.6493225097656, + "dapo/avg_reward_std": 0.3074521411742483, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.37500000638621195, + "dapo/num_sampling_attempts": 3.5, + "dapo/sampling_efficiency": 33.035714285714285, + "dapo/total_prompts_processed": 21.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.15428571428571428, + "grad_norm": 0.04016295075416565, + "kl": 4.020519554615021e-05, + "learning_rate": 3.6696851061588994e-07, + "loss": 0.081, + "reward": 0.6064621905097738, + "reward_std": 0.9165264815092087, + "step": 135 + }, + { + "clip_fraction": 0.0, + "completion_length": 2051.2812728881836, + "dapo/avg_reward_std": 0.20643932349754102, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2979798059571873, + "dapo/num_sampling_attempts": 4.125, + "dapo/sampling_efficiency": 49.26136363636363, + "dapo/total_prompts_processed": 24.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.15542857142857142, + "grad_norm": 0.03907117620110512, + "kl": 4.081428050994873e-05, + "learning_rate": 3.5982178221668533e-07, + "loss": 0.0631, + "reward": 0.6007686145603657, + "reward_std": 0.946811780333519, + "step": 136 + }, + { + "clip_fraction": 0.0, + "completion_length": 2981.6145935058594, + "dapo/avg_reward_std": 0.17673770231860025, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.26190476829097387, + "dapo/num_sampling_attempts": 5.25, + "dapo/sampling_efficiency": 33.19444444444444, + "dapo/total_prompts_processed": 31.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.15657142857142858, + "grad_norm": 0.026764124631881714, + "kl": 2.1813437342643738e-05, + "learning_rate": 3.5273298394491515e-07, + "loss": 0.0296, + "reward": 0.5422612819820642, + "reward_std": 0.9660339280962944, + "step": 137 + }, + { + "clip_fraction": 0.0, + "completion_length": 1996.4930725097656, + "dapo/avg_reward_std": 0.2211539367834727, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.35000000447034835, + "dapo/num_sampling_attempts": 3.75, + "dapo/sampling_efficiency": 41.666666666666664, + "dapo/total_prompts_processed": 22.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.15771428571428572, + "grad_norm": 0.036459192633628845, + "kl": 6.0535967350006104e-05, + "learning_rate": 3.45704275117204e-07, + "loss": 0.0473, + "reward": 0.6352426074445248, + "reward_std": 1.0075769945979118, + "step": 138 + }, + { + "clip_fraction": 0.0, + "completion_length": 2673.013931274414, + "dapo/avg_reward_std": 0.21187836019431844, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.28431372738936367, + "dapo/num_sampling_attempts": 4.25, + "dapo/sampling_efficiency": 40.347222222222214, + "dapo/total_prompts_processed": 25.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.15885714285714286, + "grad_norm": 0.027443382889032364, + "kl": 4.770606756210327e-05, + "learning_rate": 3.387377967463493e-07, + "loss": 0.0398, + "reward": 0.53852697648108, + "reward_std": 0.9717471078038216, + "step": 139 + }, + { + "clip_fraction": 0.0, + "completion_length": 2352.944465637207, + "dapo/avg_reward_std": 0.28073156496574136, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.33908046936166697, + "dapo/num_sampling_attempts": 3.625, + "dapo/sampling_efficiency": 31.666666666666664, + "dapo/total_prompts_processed": 21.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.16, + "grad_norm": 0.03219648823142052, + "kl": 1.9827857613563538e-05, + "learning_rate": 3.3183567088914833e-07, + "loss": 0.0502, + "reward": 0.5767329391092062, + "reward_std": 0.920682892203331, + "step": 140 + }, + { + "clip_fraction": 0.0, + "completion_length": 2714.9097595214844, + "dapo/avg_reward_std": 0.17997434735298157, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.26495726979695833, + "dapo/num_sampling_attempts": 4.875, + "dapo/sampling_efficiency": 24.82142857142857, + "dapo/total_prompts_processed": 29.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.16114285714285714, + "grad_norm": 0.03654953092336655, + "kl": 2.0893290638923645e-05, + "learning_rate": 3.250000000000001e-07, + "loss": 0.0808, + "reward": 0.7222395315766335, + "reward_std": 0.9689760208129883, + "step": 141 + }, + { + "clip_fraction": 0.0, + "completion_length": 1895.9965209960938, + "dapo/avg_reward_std": 0.24079040033476692, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.30476190788405283, + "dapo/num_sampling_attempts": 4.375, + "dapo/sampling_efficiency": 36.67207792207792, + "dapo/total_prompts_processed": 26.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.16228571428571428, + "grad_norm": 0.05263448879122734, + "kl": 8.018314838409424e-05, + "learning_rate": 3.182328662904756e-07, + "loss": 0.0952, + "reward": 0.5266689900308847, + "reward_std": 0.9142153859138489, + "step": 142 + }, + { + "clip_fraction": 0.0, + "completion_length": 2619.2291717529297, + "dapo/avg_reward_std": 0.2643248688790106, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.34408602887584316, + "dapo/num_sampling_attempts": 3.875, + "dapo/sampling_efficiency": 32.410714285714285, + "dapo/total_prompts_processed": 23.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.16342857142857142, + "grad_norm": 0.029158689081668854, + "kl": 3.154575824737549e-05, + "learning_rate": 3.115363310950578e-07, + "loss": 0.0032, + "reward": 0.5475870370864868, + "reward_std": 0.8940814658999443, + "step": 143 + }, + { + "clip_fraction": 0.0, + "completion_length": 2439.340316772461, + "dapo/avg_reward_std": 0.25194550690979794, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.33908046576483497, + "dapo/num_sampling_attempts": 3.625, + "dapo/sampling_efficiency": 48.86904761904761, + "dapo/total_prompts_processed": 21.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.16457142857142856, + "grad_norm": 0.027842765673995018, + "kl": 4.0609389543533325e-05, + "learning_rate": 3.0491243424323783e-07, + "loss": 0.0, + "reward": 0.6661859937012196, + "reward_std": 0.9778606072068214, + "step": 144 + }, + { + "clip_fraction": 0.0, + "completion_length": 2299.4166870117188, + "dapo/avg_reward_std": 0.19899881369358785, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2567567603813635, + "dapo/num_sampling_attempts": 4.625, + "dapo/sampling_efficiency": 27.96626984126984, + "dapo/total_prompts_processed": 27.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.1657142857142857, + "grad_norm": 0.041895266622304916, + "kl": 6.861239671707153e-05, + "learning_rate": 2.9836319343816397e-07, + "loss": 0.1109, + "reward": 0.6072739865630865, + "reward_std": 0.9706787243485451, + "step": 145 + }, + { + "clip_fraction": 0.0, + "completion_length": 2448.3993225097656, + "dapo/avg_reward_std": 0.26682727987116034, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.4015151573853059, + "dapo/num_sampling_attempts": 2.75, + "dapo/sampling_efficiency": 61.25, + "dapo/total_prompts_processed": 16.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.16685714285714287, + "grad_norm": 0.033113960176706314, + "kl": 6.478279829025269e-05, + "learning_rate": 2.918906036420294e-07, + "loss": -0.0725, + "reward": 0.7111451979726553, + "reward_std": 0.9747665524482727, + "step": 146 + }, + { + "clip_fraction": 0.0, + "completion_length": 2499.4132080078125, + "dapo/avg_reward_std": 0.23725970940930502, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.36904762791735785, + "dapo/num_sampling_attempts": 3.5, + "dapo/sampling_efficiency": 40.972222222222214, + "dapo/total_prompts_processed": 21.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.168, + "grad_norm": 0.03699960932135582, + "kl": 5.050189793109894e-05, + "learning_rate": 2.854966364683872e-07, + "loss": 0.0512, + "reward": 0.5902281412854791, + "reward_std": 0.9745439067482948, + "step": 147 + }, + { + "clip_fraction": 0.0, + "completion_length": 2606.902816772461, + "dapo/avg_reward_std": 0.3174622275612571, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.46212122250686993, + "dapo/num_sampling_attempts": 2.75, + "dapo/sampling_efficiency": 50.416666666666664, + "dapo/total_prompts_processed": 16.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.16914285714285715, + "grad_norm": 0.032203614711761475, + "kl": 3.288034349679947e-05, + "learning_rate": 2.791832395815782e-07, + "loss": 0.0183, + "reward": 0.4769565463066101, + "reward_std": 0.9322275221347809, + "step": 148 + }, + { + "clip_fraction": 0.0, + "completion_length": 2815.8160247802734, + "dapo/avg_reward_std": 0.2469456638350631, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2979798046025363, + "dapo/num_sampling_attempts": 4.125, + "dapo/sampling_efficiency": 35.11904761904762, + "dapo/total_prompts_processed": 24.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.1702857142857143, + "grad_norm": 0.030444171279668808, + "kl": 3.5978853702545166e-05, + "learning_rate": 2.729523361034538e-07, + "loss": 0.056, + "reward": 0.6807443965226412, + "reward_std": 0.9815046414732933, + "step": 149 + }, + { + "clip_fraction": 0.0, + "completion_length": 2225.520866394043, + "dapo/avg_reward_std": 0.19231303450134066, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2777777844005161, + "dapo/num_sampling_attempts": 4.5, + "dapo/sampling_efficiency": 32.18749999999999, + "dapo/total_prompts_processed": 27.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.17142857142857143, + "grad_norm": 0.03868250176310539, + "kl": 4.6514905989170074e-05, + "learning_rate": 2.6680582402757324e-07, + "loss": -0.037, + "reward": 0.6887061549350619, + "reward_std": 0.9610730484127998, + "step": 150 + }, + { + "clip_fraction": 0.0, + "completion_length": 3103.3784790039062, + "dapo/avg_reward_std": 0.20304633464132035, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.31547619295971735, + "dapo/num_sampling_attempts": 3.5, + "dapo/sampling_efficiency": 40.32738095238095, + "dapo/total_prompts_processed": 21.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.17257142857142857, + "grad_norm": 0.03259337320923805, + "kl": 7.005780935287476e-05, + "learning_rate": 2.6074557564105724e-07, + "loss": 0.0659, + "reward": 0.5518668536096811, + "reward_std": 0.9462934136390686, + "step": 151 + }, + { + "clip_fraction": 0.0, + "completion_length": 2488.499984741211, + "dapo/avg_reward_std": 0.20882706064730883, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3177083367481828, + "dapo/num_sampling_attempts": 4.0, + "dapo/sampling_efficiency": 39.409722222222214, + "dapo/total_prompts_processed": 24.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.1737142857142857, + "grad_norm": 0.030666321516036987, + "kl": 3.533810377120972e-05, + "learning_rate": 2.547734369542718e-07, + "loss": 0.0437, + "reward": 0.5291262120008469, + "reward_std": 0.981982946395874, + "step": 152 + }, + { + "clip_fraction": 0.0, + "completion_length": 2514.8507080078125, + "dapo/avg_reward_std": 0.20546393813910308, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3209876600239012, + "dapo/num_sampling_attempts": 3.375, + "dapo/sampling_efficiency": 35.93749999999999, + "dapo/total_prompts_processed": 20.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.17485714285714285, + "grad_norm": 0.028674930334091187, + "kl": 7.952749729156494e-05, + "learning_rate": 2.488912271385139e-07, + "loss": -0.0145, + "reward": 0.5828098729252815, + "reward_std": 0.9706256464123726, + "step": 153 + }, + { + "clip_fraction": 0.0, + "completion_length": 2717.2847290039062, + "dapo/avg_reward_std": 0.25499844749768574, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.36666667511065804, + "dapo/num_sampling_attempts": 3.75, + "dapo/sampling_efficiency": 34.791666666666664, + "dapo/total_prompts_processed": 22.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.176, + "grad_norm": 0.030772393569350243, + "kl": 4.854763392359018e-05, + "learning_rate": 2.4310073797187573e-07, + "loss": 0.0426, + "reward": 0.45278373593464494, + "reward_std": 0.9311749711632729, + "step": 154 + }, + { + "clip_fraction": 0.0, + "completion_length": 2762.3055725097656, + "dapo/avg_reward_std": 0.29779375117758045, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3985507280930229, + "dapo/num_sampling_attempts": 2.875, + "dapo/sampling_efficiency": 46.25, + "dapo/total_prompts_processed": 17.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.17714285714285713, + "grad_norm": 0.02795676700770855, + "kl": 6.116554141044617e-05, + "learning_rate": 2.374037332934512e-07, + "loss": -0.017, + "reward": 0.5571175646036863, + "reward_std": 0.951450802385807, + "step": 155 + }, + { + "clip_fraction": 0.0, + "completion_length": 2260.506950378418, + "dapo/avg_reward_std": 0.19260793987740862, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.20921986375717408, + "dapo/num_sampling_attempts": 5.875, + "dapo/sampling_efficiency": 20.416666666666664, + "dapo/total_prompts_processed": 35.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.1782857142857143, + "grad_norm": 0.03577401861548424, + "kl": 4.409998655319214e-05, + "learning_rate": 2.3180194846605364e-07, + "loss": 0.0769, + "reward": 0.6440617088228464, + "reward_std": 0.9337564334273338, + "step": 156 + }, + { + "clip_fraction": 0.0, + "completion_length": 2340.84725189209, + "dapo/avg_reward_std": 0.27447891732056934, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.40972222946584225, + "dapo/num_sampling_attempts": 3.0, + "dapo/sampling_efficiency": 46.87499999999999, + "dapo/total_prompts_processed": 18.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.17942857142857144, + "grad_norm": 0.045233093202114105, + "kl": 6.485730409622192e-05, + "learning_rate": 2.2629708984760706e-07, + "loss": 0.0363, + "reward": 0.7273098900914192, + "reward_std": 0.9823846518993378, + "step": 157 + }, + { + "clip_fraction": 0.0, + "completion_length": 2282.3819580078125, + "dapo/avg_reward_std": 0.20623917956101268, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.31140351334684774, + "dapo/num_sampling_attempts": 4.75, + "dapo/sampling_efficiency": 23.680555555555557, + "dapo/total_prompts_processed": 28.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.18057142857142858, + "grad_norm": 0.02890234813094139, + "kl": 5.996227264404297e-05, + "learning_rate": 2.2089083427137329e-07, + "loss": 0.0031, + "reward": 0.6950137317180634, + "reward_std": 0.9464666321873665, + "step": 158 + }, + { + "clip_fraction": 0.0, + "completion_length": 2021.6284866333008, + "dapo/avg_reward_std": 0.23576846316054062, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3198198257265864, + "dapo/num_sampling_attempts": 4.625, + "dapo/sampling_efficiency": 26.96969696969697, + "dapo/total_prompts_processed": 27.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.18171428571428572, + "grad_norm": 0.03477742150425911, + "kl": 6.712228059768677e-05, + "learning_rate": 2.1558482853517253e-07, + "loss": 0.0402, + "reward": 0.5178025495260954, + "reward_std": 0.9177478551864624, + "step": 159 + }, + { + "clip_fraction": 0.0, + "completion_length": 2372.9931030273438, + "dapo/avg_reward_std": 0.1955654670794805, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.24206349643922986, + "dapo/num_sampling_attempts": 5.25, + "dapo/sampling_efficiency": 22.916666666666664, + "dapo/total_prompts_processed": 31.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.18285714285714286, + "grad_norm": 0.03023899346590042, + "kl": 0.00011706352233886719, + "learning_rate": 2.1038068889975259e-07, + "loss": -0.023, + "reward": 0.5155377965420485, + "reward_std": 0.9538168758153915, + "step": 160 + }, + { + "clip_fraction": 0.0, + "completion_length": 2786.184097290039, + "dapo/avg_reward_std": 0.22358988050152273, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3039215772467501, + "dapo/num_sampling_attempts": 4.25, + "dapo/sampling_efficiency": 36.354166666666664, + "dapo/total_prompts_processed": 25.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.184, + "grad_norm": 0.029065359383821487, + "kl": 7.36340880393982e-05, + "learning_rate": 2.0528000059645995e-07, + "loss": 0.0183, + "reward": 0.5675038225017488, + "reward_std": 0.9294460043311119, + "step": 161 + }, + { + "clip_fraction": 0.0, + "completion_length": 2661.7986183166504, + "dapo/avg_reward_std": 0.23443660909129727, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3494623740834574, + "dapo/num_sampling_attempts": 3.875, + "dapo/sampling_efficiency": 39.166666666666664, + "dapo/total_prompts_processed": 23.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.18514285714285714, + "grad_norm": 0.03428042680025101, + "kl": 7.835030555725098e-05, + "learning_rate": 2.0028431734436308e-07, + "loss": 0.0077, + "reward": 0.6459280159324408, + "reward_std": 0.961892195045948, + "step": 162 + }, + { + "clip_fraction": 0.0, + "completion_length": 2645.9305725097656, + "dapo/avg_reward_std": 0.2903378981611003, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.39855073133240576, + "dapo/num_sampling_attempts": 2.875, + "dapo/sampling_efficiency": 54.166666666666664, + "dapo/total_prompts_processed": 17.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.18628571428571428, + "grad_norm": 0.026776015758514404, + "kl": 6.175786256790161e-05, + "learning_rate": 1.9539516087697517e-07, + "loss": 0.0499, + "reward": 0.834372952580452, + "reward_std": 0.9364972710609436, + "step": 163 + }, + { + "clip_fraction": 0.0, + "completion_length": 2940.7604370117188, + "dapo/avg_reward_std": 0.28692422310511273, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.35555556217829387, + "dapo/num_sampling_attempts": 3.75, + "dapo/sampling_efficiency": 32.708333333333336, + "dapo/total_prompts_processed": 22.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.18742857142857142, + "grad_norm": 0.03140675649046898, + "kl": 6.527453660964966e-05, + "learning_rate": 1.9061402047871833e-07, + "loss": 0.074, + "reward": 0.41690353071317077, + "reward_std": 0.9491114094853401, + "step": 164 + }, + { + "clip_fraction": 0.0, + "completion_length": 2281.6284675598145, + "dapo/avg_reward_std": 0.19226541501634262, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.28921569007284503, + "dapo/num_sampling_attempts": 4.25, + "dapo/sampling_efficiency": 34.285714285714285, + "dapo/total_prompts_processed": 25.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.18857142857142858, + "grad_norm": 0.044475626200437546, + "kl": 6.622821092605591e-05, + "learning_rate": 1.8594235253127372e-07, + "loss": 0.0216, + "reward": 0.5352295860648155, + "reward_std": 0.9716188460588455, + "step": 165 + }, + { + "clip_fraction": 0.0, + "completion_length": 2246.774314880371, + "dapo/avg_reward_std": 0.21395914729048565, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2642276446993758, + "dapo/num_sampling_attempts": 5.125, + "dapo/sampling_efficiency": 32.51488095238095, + "dapo/total_prompts_processed": 30.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.18971428571428572, + "grad_norm": 0.03659826144576073, + "kl": 7.368624210357666e-05, + "learning_rate": 1.8138158006995363e-07, + "loss": 0.0485, + "reward": 0.5606641564518213, + "reward_std": 0.9496459811925888, + "step": 166 + }, + { + "clip_fraction": 0.0, + "completion_length": 2340.156265258789, + "dapo/avg_reward_std": 0.2663822333017985, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3888888974984487, + "dapo/num_sampling_attempts": 3.75, + "dapo/sampling_efficiency": 30.32738095238095, + "dapo/total_prompts_processed": 22.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.19085714285714286, + "grad_norm": 0.03370486944913864, + "kl": 0.00011890754103660583, + "learning_rate": 1.7693309235023127e-07, + "loss": 0.0107, + "reward": 0.615155003964901, + "reward_std": 0.981718622148037, + "step": 167 + }, + { + "clip_fraction": 0.0, + "completion_length": 1600.381950378418, + "dapo/avg_reward_std": 0.2149174999859598, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.31481481964389485, + "dapo/num_sampling_attempts": 4.5, + "dapo/sampling_efficiency": 39.30555555555556, + "dapo/total_prompts_processed": 27.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.192, + "grad_norm": 0.040477264672517776, + "kl": 4.4405460357666016e-05, + "learning_rate": 1.7259824442455923e-07, + "loss": 0.0183, + "reward": 0.7775004804134369, + "reward_std": 0.9218784719705582, + "step": 168 + }, + { + "clip_fraction": 0.0, + "completion_length": 2663.3229370117188, + "dapo/avg_reward_std": 0.29243687472560187, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.4242424314672297, + "dapo/num_sampling_attempts": 2.75, + "dapo/sampling_efficiency": 45.20833333333333, + "dapo/total_prompts_processed": 16.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.19314285714285714, + "grad_norm": 0.033447615802288055, + "kl": 6.474554538726807e-05, + "learning_rate": 1.6837835672960831e-07, + "loss": 0.0604, + "reward": 0.6684309486299753, + "reward_std": 0.9398416355252266, + "step": 169 + }, + { + "clip_fraction": 0.0, + "completion_length": 1823.3020782470703, + "dapo/avg_reward_std": 0.19836447931624748, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.24324324847878637, + "dapo/num_sampling_attempts": 4.625, + "dapo/sampling_efficiency": 33.229166666666664, + "dapo/total_prompts_processed": 27.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.19428571428571428, + "grad_norm": 0.050460852682590485, + "kl": 8.266419172286987e-05, + "learning_rate": 1.6427471468404952e-07, + "loss": 0.0797, + "reward": 0.6385768353939056, + "reward_std": 0.9705075472593307, + "step": 170 + }, + { + "clip_fraction": 0.0, + "completion_length": 2620.312515258789, + "dapo/avg_reward_std": 0.2494219935992185, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.29901961412499933, + "dapo/num_sampling_attempts": 4.25, + "dapo/sampling_efficiency": 30.3125, + "dapo/total_prompts_processed": 25.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.19542857142857142, + "grad_norm": 0.030058681964874268, + "kl": 5.9291720390319824e-05, + "learning_rate": 1.6028856829700258e-07, + "loss": 0.04, + "reward": 0.5667276866734028, + "reward_std": 0.9310731589794159, + "step": 171 + }, + { + "clip_fraction": 0.0, + "completion_length": 2728.118064880371, + "dapo/avg_reward_std": 0.3154246766458858, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.47727273540063336, + "dapo/num_sampling_attempts": 2.75, + "dapo/sampling_efficiency": 52.82738095238095, + "dapo/total_prompts_processed": 16.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.19657142857142856, + "grad_norm": 0.02854626253247261, + "kl": 4.601478576660156e-05, + "learning_rate": 1.5642113178727193e-07, + "loss": -0.0071, + "reward": 0.5269420258700848, + "reward_std": 0.9420886114239693, + "step": 172 + }, + { + "clip_fraction": 0.0, + "completion_length": 2000.6111297607422, + "dapo/avg_reward_std": 0.1943835632221119, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.2657657728807346, + "dapo/num_sampling_attempts": 4.625, + "dapo/sampling_efficiency": 38.02083333333333, + "dapo/total_prompts_processed": 27.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.1977142857142857, + "grad_norm": 0.033435527235269547, + "kl": 6.041303277015686e-05, + "learning_rate": 1.5267358321348285e-07, + "loss": -0.0116, + "reward": 0.6523085497319698, + "reward_std": 0.9166425243020058, + "step": 173 + }, + { + "clip_fraction": 0.0, + "completion_length": 2643.138916015625, + "dapo/avg_reward_std": 0.31710357325417654, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.46031746977851506, + "dapo/num_sampling_attempts": 2.625, + "dapo/sampling_efficiency": 51.45833333333333, + "dapo/total_prompts_processed": 15.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.19885714285714284, + "grad_norm": 0.02673209458589554, + "kl": 0.00010142475366592407, + "learning_rate": 1.4904706411523448e-07, + "loss": 0.0252, + "reward": 0.5322555489838123, + "reward_std": 0.9057421013712883, + "step": 174 + }, + { + "clip_fraction": 0.0, + "completion_length": 2441.3437576293945, + "dapo/avg_reward_std": 0.30628569194903743, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.38461538977347887, + "dapo/num_sampling_attempts": 3.25, + "dapo/sampling_efficiency": 41.041666666666664, + "dapo/total_prompts_processed": 19.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.2, + "grad_norm": 0.04055117443203926, + "kl": 4.247203469276428e-05, + "learning_rate": 1.4554267916537495e-07, + "loss": 0.0974, + "reward": 0.6256343480199575, + "reward_std": 0.9141717404127121, + "step": 175 + }, + { + "clip_fraction": 0.0, + "completion_length": 2001.5173797607422, + "dapo/avg_reward_std": 0.28915207616744504, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3817204381189039, + "dapo/num_sampling_attempts": 3.875, + "dapo/sampling_efficiency": 29.285714285714285, + "dapo/total_prompts_processed": 23.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.20114285714285715, + "grad_norm": 0.03139885142445564, + "kl": 8.495151996612549e-05, + "learning_rate": 1.4216149583350755e-07, + "loss": 0.0178, + "reward": 0.5467482833191752, + "reward_std": 0.9077746942639351, + "step": 176 + }, + { + "clip_fraction": 0.0, + "completion_length": 2707.1875, + "dapo/avg_reward_std": 0.2716821462943636, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3620689732247385, + "dapo/num_sampling_attempts": 3.625, + "dapo/sampling_efficiency": 37.5, + "dapo/total_prompts_processed": 21.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.2022857142857143, + "grad_norm": 0.027195578441023827, + "kl": 3.4984201192855835e-05, + "learning_rate": 1.3890454406082956e-07, + "loss": 0.0243, + "reward": 0.4738291520625353, + "reward_std": 0.9582962840795517, + "step": 177 + }, + { + "clip_fraction": 0.0, + "completion_length": 2927.2534790039062, + "dapo/avg_reward_std": 0.2845180779695511, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3750000127724239, + "dapo/num_sampling_attempts": 3.5, + "dapo/sampling_efficiency": 34.49404761904761, + "dapo/total_prompts_processed": 21.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.20342857142857143, + "grad_norm": 0.0315893292427063, + "kl": 9.309500455856323e-05, + "learning_rate": 1.3577281594640182e-07, + "loss": 0.067, + "reward": 0.52550208568573, + "reward_std": 0.9910342618823051, + "step": 178 + }, + { + "clip_fraction": 0.0, + "completion_length": 2337.701400756836, + "dapo/avg_reward_std": 0.18291032314300537, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.25438597092502996, + "dapo/num_sampling_attempts": 4.75, + "dapo/sampling_efficiency": 32.81249999999999, + "dapo/total_prompts_processed": 28.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.20457142857142857, + "grad_norm": 0.031005509197711945, + "kl": 9.676814079284668e-05, + "learning_rate": 1.3276726544494571e-07, + "loss": 0.0165, + "reward": 0.6187671273946762, + "reward_std": 0.9665273353457451, + "step": 179 + }, + { + "clip_fraction": 0.0, + "completion_length": 2257.3958892822266, + "dapo/avg_reward_std": 0.20009312199221718, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.30092593158284825, + "dapo/num_sampling_attempts": 4.5, + "dapo/sampling_efficiency": 40.95238095238095, + "dapo/total_prompts_processed": 27.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.2057142857142857, + "grad_norm": 0.0394003801047802, + "kl": 6.996467709541321e-05, + "learning_rate": 1.2988880807625927e-07, + "loss": 0.0627, + "reward": 0.7572303153574467, + "reward_std": 0.9510952234268188, + "step": 180 + }, + { + "clip_fraction": 0.0, + "completion_length": 2533.9375534057617, + "dapo/avg_reward_std": 0.37206994990507763, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.5740740895271301, + "dapo/num_sampling_attempts": 2.25, + "dapo/sampling_efficiency": 51.041666666666664, + "dapo/total_prompts_processed": 13.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.20685714285714285, + "grad_norm": 0.03264293819665909, + "kl": 4.2844563722610474e-05, + "learning_rate": 1.2713832064634125e-07, + "loss": 0.0513, + "reward": 0.7092031128704548, + "reward_std": 1.0104939341545105, + "step": 181 + }, + { + "clip_fraction": 0.0, + "completion_length": 2425.8055572509766, + "dapo/avg_reward_std": 0.275991202547, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.384615390919722, + "dapo/num_sampling_attempts": 3.25, + "dapo/sampling_efficiency": 41.041666666666664, + "dapo/total_prompts_processed": 19.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.208, + "grad_norm": 0.033197954297065735, + "kl": 6.585032679140568e-05, + "learning_rate": 1.2451664098030743e-07, + "loss": 0.0327, + "reward": 0.5725661776959896, + "reward_std": 0.9082557633519173, + "step": 182 + }, + { + "clip_fraction": 0.0, + "completion_length": 2288.0799255371094, + "dapo/avg_reward_std": 0.31956043162129144, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.4318181872367859, + "dapo/num_sampling_attempts": 2.75, + "dapo/sampling_efficiency": 52.083333333333336, + "dapo/total_prompts_processed": 16.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.20914285714285713, + "grad_norm": 0.0300610288977623, + "kl": 9.128451347351074e-05, + "learning_rate": 1.220245676671809e-07, + "loss": 0.0567, + "reward": 0.7111962893977761, + "reward_std": 0.9172193482518196, + "step": 183 + }, + { + "clip_fraction": 0.0, + "completion_length": 2212.138900756836, + "dapo/avg_reward_std": 0.31106447339057924, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.4066666769981384, + "dapo/num_sampling_attempts": 3.125, + "dapo/sampling_efficiency": 40.97222222222222, + "dapo/total_prompts_processed": 18.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.2102857142857143, + "grad_norm": 0.03711786866188049, + "kl": 9.056925773620605e-05, + "learning_rate": 1.1966285981663407e-07, + "loss": 0.0405, + "reward": 0.505124656483531, + "reward_std": 0.9274496361613274, + "step": 184 + }, + { + "clip_fraction": 0.0, + "completion_length": 2350.8820037841797, + "dapo/avg_reward_std": 0.21689824704770688, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3209876600239012, + "dapo/num_sampling_attempts": 3.375, + "dapo/sampling_efficiency": 47.22222222222222, + "dapo/total_prompts_processed": 20.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.21142857142857144, + "grad_norm": 0.03581295162439346, + "kl": 0.00011820532381534576, + "learning_rate": 1.1743223682775649e-07, + "loss": 0.0582, + "reward": 0.6189532484859228, + "reward_std": 0.92426348477602, + "step": 185 + }, + { + "clip_fraction": 0.0, + "completion_length": 2414.6770629882812, + "dapo/avg_reward_std": 0.26570350316263014, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.33333333749924937, + "dapo/num_sampling_attempts": 3.875, + "dapo/sampling_efficiency": 41.785714285714285, + "dapo/total_prompts_processed": 23.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.21257142857142858, + "grad_norm": 0.04000677913427353, + "kl": 5.166977643966675e-05, + "learning_rate": 1.1533337816991931e-07, + "loss": 0.0842, + "reward": 0.6384202986955643, + "reward_std": 0.9535242542624474, + "step": 186 + }, + { + "clip_fraction": 0.0, + "completion_length": 2179.180564880371, + "dapo/avg_reward_std": 0.267340756695846, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.362068974766238, + "dapo/num_sampling_attempts": 3.625, + "dapo/sampling_efficiency": 31.25, + "dapo/total_prompts_processed": 21.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.21371428571428572, + "grad_norm": 0.03956381976604462, + "kl": 7.00727105140686e-05, + "learning_rate": 1.1336692317580158e-07, + "loss": 0.0838, + "reward": 0.6583898914977908, + "reward_std": 0.9566742405295372, + "step": 187 + }, + { + "clip_fraction": 0.0, + "completion_length": 2340.65975189209, + "dapo/avg_reward_std": 0.19622711837291718, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.31770833721384406, + "dapo/num_sampling_attempts": 4.0, + "dapo/sampling_efficiency": 55.51136363636363, + "dapo/total_prompts_processed": 24.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.21485714285714286, + "grad_norm": 0.03709344565868378, + "kl": 9.210407733917236e-05, + "learning_rate": 1.1153347084664419e-07, + "loss": 0.0542, + "reward": 0.5126780550926924, + "reward_std": 0.9266727864742279, + "step": 188 + }, + { + "clip_fraction": 0.0, + "completion_length": 3183.7395782470703, + "dapo/avg_reward_std": 0.19985724004303537, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.23577236293292628, + "dapo/num_sampling_attempts": 5.125, + "dapo/sampling_efficiency": 23.1547619047619, + "dapo/total_prompts_processed": 30.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.216, + "grad_norm": 0.025569448247551918, + "kl": 3.505079075694084e-05, + "learning_rate": 1.0983357966978745e-07, + "loss": 0.0446, + "reward": 0.524140851572156, + "reward_std": 0.9313696026802063, + "step": 189 + }, + { + "clip_fraction": 0.0, + "completion_length": 2137.0764083862305, + "dapo/avg_reward_std": 0.2310014808177948, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.33333334028720857, + "dapo/num_sampling_attempts": 3.125, + "dapo/sampling_efficiency": 49.479166666666664, + "dapo/total_prompts_processed": 18.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.21714285714285714, + "grad_norm": 0.049144402146339417, + "kl": 0.00011414289474487305, + "learning_rate": 1.0826776744855121e-07, + "loss": 0.0597, + "reward": 0.6003488898277283, + "reward_std": 0.9967769384384155, + "step": 190 + }, + { + "clip_fraction": 0.0, + "completion_length": 2711.965301513672, + "dapo/avg_reward_std": 0.27090639670689903, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3388888930281003, + "dapo/num_sampling_attempts": 3.75, + "dapo/sampling_efficiency": 42.604166666666664, + "dapo/total_prompts_processed": 22.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.21828571428571428, + "grad_norm": 0.03207146376371384, + "kl": 7.285922765731812e-05, + "learning_rate": 1.068365111445064e-07, + "loss": 0.0774, + "reward": 0.5157463289797306, + "reward_std": 0.9445067569613457, + "step": 191 + }, + { + "clip_fraction": 0.0, + "completion_length": 2634.809066772461, + "dapo/avg_reward_std": 0.23276896492854968, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.29729730414377675, + "dapo/num_sampling_attempts": 4.625, + "dapo/sampling_efficiency": 31.38888888888889, + "dapo/total_prompts_processed": 27.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.21942857142857142, + "grad_norm": 0.026157336309552193, + "kl": 4.951097071170807e-05, + "learning_rate": 1.0554024673218806e-07, + "loss": 0.0183, + "reward": 0.4917615167796612, + "reward_std": 0.932147391140461, + "step": 192 + }, + { + "clip_fraction": 0.0, + "completion_length": 2687.6562423706055, + "dapo/avg_reward_std": 0.1842694640159607, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3000000034769376, + "dapo/num_sampling_attempts": 3.75, + "dapo/sampling_efficiency": 37.20238095238095, + "dapo/total_prompts_processed": 22.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.22057142857142858, + "grad_norm": 0.036305345594882965, + "kl": 5.197897553443909e-05, + "learning_rate": 1.0437936906629334e-07, + "loss": 0.0737, + "reward": 0.8177419528365135, + "reward_std": 0.9367102533578873, + "step": 193 + }, + { + "clip_fraction": 0.0, + "completion_length": 2567.093780517578, + "dapo/avg_reward_std": 0.2292217422615398, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.30808081364992895, + "dapo/num_sampling_attempts": 4.125, + "dapo/sampling_efficiency": 36.284722222222214, + "dapo/total_prompts_processed": 24.75, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.22171428571428572, + "grad_norm": 0.03788081929087639, + "kl": 8.841603994369507e-05, + "learning_rate": 1.0335423176140511e-07, + "loss": 0.0745, + "reward": 0.4994155182503164, + "reward_std": 0.9395617768168449, + "step": 194 + }, + { + "clip_fraction": 0.0, + "completion_length": 2132.22225189209, + "dapo/avg_reward_std": 0.23152823698136113, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.34408602791447795, + "dapo/num_sampling_attempts": 3.875, + "dapo/sampling_efficiency": 46.800595238095234, + "dapo/total_prompts_processed": 23.25, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.22285714285714286, + "grad_norm": 0.03888849914073944, + "kl": 7.880479097366333e-05, + "learning_rate": 1.0246514708427701e-07, + "loss": 0.0078, + "reward": 0.4982965085655451, + "reward_std": 0.9277759939432144, + "step": 195 + }, + { + "clip_fraction": 0.0, + "completion_length": 2242.8437881469727, + "dapo/avg_reward_std": 0.2252171416031687, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.27192983148913635, + "dapo/num_sampling_attempts": 4.75, + "dapo/sampling_efficiency": 42.49999999999999, + "dapo/total_prompts_processed": 28.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.224, + "grad_norm": 0.03532218188047409, + "kl": 8.079037070274353e-05, + "learning_rate": 1.017123858587145e-07, + "loss": -0.0036, + "reward": 0.6249313289299607, + "reward_std": 0.9415610581636429, + "step": 196 + }, + { + "clip_fraction": 0.0, + "completion_length": 2186.913246154785, + "dapo/avg_reward_std": 0.2062954322287911, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.26754386566187205, + "dapo/num_sampling_attempts": 4.75, + "dapo/sampling_efficiency": 27.549603174603174, + "dapo/total_prompts_processed": 28.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.22514285714285714, + "grad_norm": 0.05644107237458229, + "kl": 0.00012712180614471436, + "learning_rate": 1.0109617738307911e-07, + "loss": 0.0266, + "reward": 0.6248354203999043, + "reward_std": 0.9687103852629662, + "step": 197 + }, + { + "clip_fraction": 0.0, + "completion_length": 2853.7430725097656, + "dapo/avg_reward_std": 0.2791443226429132, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.41666667277996355, + "dapo/num_sampling_attempts": 3.25, + "dapo/sampling_efficiency": 45.3125, + "dapo/total_prompts_processed": 19.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.22628571428571428, + "grad_norm": 0.025631451979279518, + "kl": 7.095187902450562e-05, + "learning_rate": 1.0061670936044178e-07, + "loss": 0.0195, + "reward": 0.683892990462482, + "reward_std": 0.9487637504935265, + "step": 198 + }, + { + "clip_fraction": 0.0, + "completion_length": 2660.218780517578, + "dapo/avg_reward_std": 0.24377418825259575, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.3910256469478974, + "dapo/num_sampling_attempts": 3.25, + "dapo/sampling_efficiency": 56.597222222222214, + "dapo/total_prompts_processed": 19.5, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.22742857142857142, + "grad_norm": 0.034018680453300476, + "kl": 6.149709224700928e-05, + "learning_rate": 1.002741278414069e-07, + "loss": 0.0404, + "reward": 0.565577644854784, + "reward_std": 0.9079905152320862, + "step": 199 + }, + { + "clip_fraction": 0.0, + "completion_length": 2421.875015258789, + "dapo/avg_reward_std": 0.3100067762037118, + "dapo/filter_reward_index": 0.0, + "dapo/kept_prompts_ratio": 0.4027777823309104, + "dapo/num_sampling_attempts": 3.0, + "dapo/sampling_efficiency": 45.83333333333333, + "dapo/total_prompts_processed": 18.0, + "dapo/valid_prompts_collected": 6.0, + "epoch": 0.22857142857142856, + "grad_norm": 0.030885452404618263, + "kl": 7.659196853637695e-05, + "learning_rate": 1.0006853717962393e-07, + "loss": 0.0132, + "reward": 0.5110834892839193, + "reward_std": 0.8930082246661186, + "step": 200 + }, + { + "epoch": 0.22857142857142856, + "step": 200, + "total_flos": 0.0, + "train_loss": 0.009447227440541611, + "train_runtime": 101500.2967, + "train_samples_per_second": 0.095, + "train_steps_per_second": 0.002 + } + ], + "logging_steps": 1, + "max_steps": 200, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..7c12e17 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f226307e55b90254d38ea4c353a35911a575a170c592310101fe03a927c9dc78 +size 8760