commit 92d21beda9f07d852c2e0c5206d895e814baf4ed Author: ModelHub XC Date: Wed May 6 13:37:50 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: bimabk/test_gin_rummy_qwen_2-5_3B Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..e645074 --- /dev/null +++ b/README.md @@ -0,0 +1,209 @@ +--- +base_model: None +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:/cache/models/unsloth--Llama-3.2-3B-Instruct +- grpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000..a16376b --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": null, + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "down_proj", + "gate_proj", + "q_proj", + "up_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000..46b774e --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25013bdff5adbe26a3cd0e1f5b322cab8ae89bbfa208e14f7ab1d8964aa87b7f +size 194563400 diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..1bad6a0 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/config.json b/config.json new file mode 100644 index 0000000..bbf879a --- /dev/null +++ b/config.json @@ -0,0 +1,37 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "dtype": "bfloat16", + "eos_token_id": 128009, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 24, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "pad_token_id": 128004, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "transformers_version": "4.57.5", + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..1fe1bd6 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,5 @@ +{ + "temperature": null, + "top_p": null, + "transformers_version": "4.57.5" +} diff --git a/loss.txt b/loss.txt new file mode 100644 index 0000000..2ccabfc --- /dev/null +++ b/loss.txt @@ -0,0 +1 @@ +75,-0.3484247986227274 \ No newline at end of file diff --git a/model-00001-of-00002.safetensors b/model-00001-of-00002.safetensors new file mode 100644 index 0000000..d6896ed --- /dev/null +++ b/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:136a080bca0bb7437e6ef58d47a7f5bbe54217a2fb386207ff9b3eb2d72f6cc0 +size 4965799096 diff --git a/model-00002-of-00002.safetensors b/model-00002-of-00002.safetensors new file mode 100644 index 0000000..c8ded1e --- /dev/null +++ b/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b770216613ac5c34d7c54bdff1fa616bc4e338a9d0b20af6303e48c295ee23c +size 1459729952 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..f84d97f --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,262 @@ +{ + "metadata": { + "total_parameters": 3212749824, + "total_size": 6425499648 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.norm.weight": "model-00002-of-00002.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..3c1d049 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..1c1d8d5 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..eccf822 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "left", + "tokenizer_class": "PreTrainedTokenizerFast", + "unk_token": null +} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..b2bde1a --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,1954 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.00075, + "eval_steps": 500, + "global_step": 75, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10096.0, + "completions/max_terminated_length": 10096.0, + "completions/mean_length": 8672.71875, + "completions/mean_terminated_length": 8672.71875, + "completions/min_length": 3020.0, + "completions/min_terminated_length": 3020.0, + "entropy": 0.49113161116838455, + "epoch": 1e-05, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.241949200630188, + "kl": 0.0, + "learning_rate": 0.0, + "loss": -0.0633, + "num_tokens": 306152.0, + "reward": -0.4408680200576782, + "reward_std": 0.3989785313606262, + "rewards/rollout_eval_reward_func/mean": 0.11064532399177551, + "rewards/rollout_eval_reward_func/std": 0.21571724116802216, + "rewards/rollout_reward_func/mean": -0.4408680200576782, + "rewards/rollout_reward_func/std": 0.44763946533203125, + "sampling/importance_sampling_ratio/max": 1.2819759845733643, + "sampling/importance_sampling_ratio/mean": 0.9992397427558899, + "sampling/importance_sampling_ratio/min": 0.7715137004852295, + "sampling/sampling_logp_difference/max": 0.2594008445739746, + "sampling/sampling_logp_difference/mean": 0.01546277105808258, + "step": 1, + "step_time": 73.26994180099973 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "entropy": 0.49113161116838455, + "epoch": 2e-05, + "grad_norm": 1.2400784492492676, + "kl": 0.0, + "learning_rate": 2.8571428571428573e-06, + "loss": -0.0633, + "step": 2, + "step_time": 30.109230951999052 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0005208333604969084, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0005208333604969084, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10009.0, + "completions/max_terminated_length": 10009.0, + "completions/mean_length": 7330.1875, + "completions/mean_terminated_length": 7330.1875, + "completions/min_length": 346.0, + "completions/min_terminated_length": 346.0, + "entropy": 0.5131296459585428, + "epoch": 3e-05, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.102152943611145, + "kl": 0.0009028113518070313, + "learning_rate": 5.7142857142857145e-06, + "loss": -0.2347, + "num_tokens": 569740.0, + "reward": -0.48799318075180054, + "reward_std": 0.5598607063293457, + "rewards/rollout_eval_reward_func/mean": 0.22929370403289795, + "rewards/rollout_eval_reward_func/std": 0.26715749502182007, + "rewards/rollout_reward_func/mean": -0.48799318075180054, + "rewards/rollout_reward_func/std": 0.5559459924697876, + "sampling/importance_sampling_ratio/max": 1.2627520561218262, + "sampling/importance_sampling_ratio/mean": 1.0006182193756104, + "sampling/importance_sampling_ratio/min": 0.7627776861190796, + "sampling/sampling_logp_difference/max": 0.27078866958618164, + "sampling/sampling_logp_difference/mean": 0.014230873435735703, + "step": 3, + "step_time": 68.85090976999709 + }, + { + "clip_ratio/high_max": 0.0020833334419876337, + "clip_ratio/high_mean": 0.0010416667209938169, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010416667209938169, + "entropy": 0.5151741988956928, + "epoch": 4e-05, + "grad_norm": 1.0848904848098755, + "kl": 0.0004950130587531021, + "learning_rate": 8.571428571428573e-06, + "loss": -0.2336, + "step": 4, + "step_time": 28.428488818004553 + }, + { + "clip_ratio/high_max": 0.0010416667209938169, + "clip_ratio/high_mean": 0.0005208333604969084, + "clip_ratio/low_mean": 0.0005208333604969084, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0010416667209938169, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10323.0, + "completions/max_terminated_length": 10323.0, + "completions/mean_length": 8267.125, + "completions/mean_terminated_length": 8267.125, + "completions/min_length": 1640.0, + "completions/min_terminated_length": 1640.0, + "entropy": 0.5123504158109426, + "epoch": 5e-05, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.1476984024047852, + "kl": 0.0007431179510604125, + "learning_rate": 1.1428571428571429e-05, + "loss": -0.0418, + "num_tokens": 862728.0, + "reward": -0.46075016260147095, + "reward_std": 0.5065791606903076, + "rewards/rollout_eval_reward_func/mean": 0.128683939576149, + "rewards/rollout_eval_reward_func/std": 0.2396152913570404, + "rewards/rollout_reward_func/mean": -0.46075016260147095, + "rewards/rollout_reward_func/std": 0.5104123950004578, + "sampling/importance_sampling_ratio/max": 1.3248213529586792, + "sampling/importance_sampling_ratio/mean": 1.0001360177993774, + "sampling/importance_sampling_ratio/min": 0.6914317011833191, + "sampling/sampling_logp_difference/max": 0.3689908981323242, + "sampling/sampling_logp_difference/mean": 0.016226449981331825, + "step": 5, + "step_time": 75.37122915000327 + }, + { + "clip_ratio/high_max": 0.0026041667442768812, + "clip_ratio/high_mean": 0.0013020833721384406, + "clip_ratio/low_mean": 0.0032900729565881193, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.00459215632872656, + "entropy": 0.5106779877096415, + "epoch": 6e-05, + "grad_norm": 1.0145094394683838, + "kl": 0.0013804795053147245, + "learning_rate": 1.4285714285714285e-05, + "loss": -0.045, + "step": 6, + "step_time": 29.551835642994774 + }, + { + "clip_ratio/high_max": 0.0024003623984754086, + "clip_ratio/high_mean": 0.0012001811992377043, + "clip_ratio/low_mean": 0.0005208333604969084, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0017210145597346127, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10088.0, + "completions/max_terminated_length": 10088.0, + "completions/mean_length": 8518.21875, + "completions/mean_terminated_length": 8518.21875, + "completions/min_length": 4084.0, + "completions/min_terminated_length": 4084.0, + "entropy": 0.5038529355078936, + "epoch": 7e-05, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.5022886991500854, + "kl": 0.002840353590727318, + "learning_rate": 1.7142857142857145e-05, + "loss": -0.0036, + "num_tokens": 1164601.0, + "reward": -0.41255950927734375, + "reward_std": 0.46968239545822144, + "rewards/rollout_eval_reward_func/mean": 0.11216971278190613, + "rewards/rollout_eval_reward_func/std": 0.2204883098602295, + "rewards/rollout_reward_func/mean": -0.41255950927734375, + "rewards/rollout_reward_func/std": 0.5122336149215698, + "sampling/importance_sampling_ratio/max": 1.4158059358596802, + "sampling/importance_sampling_ratio/mean": 1.0018370151519775, + "sampling/importance_sampling_ratio/min": 0.7707551121711731, + "sampling/sampling_logp_difference/max": 0.3476989269256592, + "sampling/sampling_logp_difference/mean": 0.017664402723312378, + "step": 7, + "step_time": 77.99332059699736 + }, + { + "clip_ratio/high_max": 0.005842391517944634, + "clip_ratio/high_mean": 0.0034420291776768863, + "clip_ratio/low_mean": 0.0051097974355798215, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.008551826613256708, + "entropy": 0.5001224614679813, + "epoch": 8e-05, + "grad_norm": 1.3377231359481812, + "kl": 0.006958273006603122, + "learning_rate": 2e-05, + "loss": -0.0079, + "step": 8, + "step_time": 30.119341139003154 + }, + { + "clip_ratio/high_max": 0.0020833334419876337, + "clip_ratio/high_mean": 0.0010416667209938169, + "clip_ratio/low_mean": 0.00046641789958812296, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0015080846205819398, + "completions/clipped_ratio": 0.0, + "completions/max_length": 9987.0, + "completions/max_terminated_length": 9987.0, + "completions/mean_length": 8235.9375, + "completions/mean_terminated_length": 8235.9375, + "completions/min_length": 2028.0, + "completions/min_terminated_length": 2028.0, + "entropy": 0.5665333420038223, + "epoch": 9e-05, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.413293719291687, + "kl": 0.012357485480606556, + "learning_rate": 2.2857142857142858e-05, + "loss": -0.0089, + "num_tokens": 1456974.0, + "reward": -0.2786320447921753, + "reward_std": 0.4699662923812866, + "rewards/rollout_eval_reward_func/mean": 0.12322154641151428, + "rewards/rollout_eval_reward_func/std": 0.23254993557929993, + "rewards/rollout_reward_func/mean": -0.2786320447921753, + "rewards/rollout_reward_func/std": 0.510530948638916, + "sampling/importance_sampling_ratio/max": 1.6322839260101318, + "sampling/importance_sampling_ratio/mean": 0.9981738328933716, + "sampling/importance_sampling_ratio/min": 0.6440463662147522, + "sampling/sampling_logp_difference/max": 0.48998022079467773, + "sampling/sampling_logp_difference/mean": 0.02640429511666298, + "step": 9, + "step_time": 80.34681812299641 + }, + { + "clip_ratio/high_max": 0.028179825632832944, + "clip_ratio/high_mean": 0.01559113833354786, + "clip_ratio/low_mean": 0.01464278216008097, + "clip_ratio/low_min": 0.006223290809430182, + "clip_ratio/region_mean": 0.03023392061004415, + "entropy": 0.5607042815536261, + "epoch": 0.0001, + "grad_norm": 1.2342119216918945, + "kl": 0.03045007959008217, + "learning_rate": 2.5714285714285714e-05, + "loss": -0.0159, + "step": 10, + "step_time": 28.650263912999435 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0005208333604969084, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0005208333604969084, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10076.0, + "completions/max_terminated_length": 10076.0, + "completions/mean_length": 8311.21875, + "completions/mean_terminated_length": 8311.21875, + "completions/min_length": 1530.0, + "completions/min_terminated_length": 1530.0, + "entropy": 0.4887528121471405, + "epoch": 0.00011, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.4407812356948853, + "kl": 0.04280303395353258, + "learning_rate": 2.857142857142857e-05, + "loss": -0.0508, + "num_tokens": 1751757.0, + "reward": -0.26280224323272705, + "reward_std": 0.4824950098991394, + "rewards/rollout_eval_reward_func/mean": 0.1091209352016449, + "rewards/rollout_eval_reward_func/std": 0.22141531109809875, + "rewards/rollout_reward_func/mean": -0.26280224323272705, + "rewards/rollout_reward_func/std": 0.4825066328048706, + "sampling/importance_sampling_ratio/max": 2.2060391902923584, + "sampling/importance_sampling_ratio/mean": 1.003042221069336, + "sampling/importance_sampling_ratio/min": 0.505047619342804, + "sampling/sampling_logp_difference/max": 0.79119873046875, + "sampling/sampling_logp_difference/mean": 0.03998423367738724, + "step": 11, + "step_time": 81.20211481799561 + }, + { + "clip_ratio/high_max": 0.031166458851657808, + "clip_ratio/high_mean": 0.01714572956552729, + "clip_ratio/low_mean": 0.018567851395346224, + "clip_ratio/low_min": 0.005885701393708587, + "clip_ratio/region_mean": 0.0357135811354965, + "entropy": 0.47410433553159237, + "epoch": 0.00012, + "grad_norm": 1.048365831375122, + "kl": 0.08051084214821458, + "learning_rate": 3.142857142857143e-05, + "loss": -0.0558, + "step": 12, + "step_time": 29.28374841400546 + }, + { + "clip_ratio/high_max": 0.001953125, + "clip_ratio/high_mean": 0.0009765625, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0009765625, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10209.0, + "completions/max_terminated_length": 10209.0, + "completions/mean_length": 8161.71875, + "completions/mean_terminated_length": 8161.71875, + "completions/min_length": 1827.0, + "completions/min_terminated_length": 1827.0, + "entropy": 0.43679925985634327, + "epoch": 0.00013, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.0560696125030518, + "kl": 0.09263498219661415, + "learning_rate": 3.428571428571429e-05, + "loss": 0.1042, + "num_tokens": 2042327.0, + "reward": -0.02590048871934414, + "reward_std": 0.6161512732505798, + "rewards/rollout_eval_reward_func/mean": 0.16006097197532654, + "rewards/rollout_eval_reward_func/std": 0.2864827811717987, + "rewards/rollout_reward_func/mean": -0.02590048871934414, + "rewards/rollout_reward_func/std": 0.6041470170021057, + "sampling/importance_sampling_ratio/max": 2.7582640647888184, + "sampling/importance_sampling_ratio/mean": 0.9981331825256348, + "sampling/importance_sampling_ratio/min": 0.361401230096817, + "sampling/sampling_logp_difference/max": 1.0177664756774902, + "sampling/sampling_logp_difference/mean": 0.06089622899889946, + "step": 13, + "step_time": 85.01218143400365 + }, + { + "clip_ratio/high_max": 0.012486383900977671, + "clip_ratio/high_mean": 0.007805692031979561, + "clip_ratio/low_mean": 0.030729168094694614, + "clip_ratio/low_min": 0.015625000465661287, + "clip_ratio/region_mean": 0.038534860184881836, + "entropy": 0.41658624820411205, + "epoch": 0.00014, + "grad_norm": 1.044942855834961, + "kl": 0.16313170175999403, + "learning_rate": 3.7142857142857143e-05, + "loss": 0.1002, + "step": 14, + "step_time": 28.990433916003894 + }, + { + "clip_ratio/high_max": 0.00596590933855623, + "clip_ratio/high_mean": 0.0035037880297750235, + "clip_ratio/low_mean": 0.0005122950533404946, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.004016083083115518, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10134.0, + "completions/max_terminated_length": 10134.0, + "completions/mean_length": 8323.34375, + "completions/mean_terminated_length": 8323.34375, + "completions/min_length": 1934.0, + "completions/min_terminated_length": 1934.0, + "entropy": 0.44160761684179306, + "epoch": 0.00015, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.2544862031936646, + "kl": 0.21658248733729124, + "learning_rate": 4e-05, + "loss": 0.1199, + "num_tokens": 2337711.0, + "reward": -0.0776321142911911, + "reward_std": 0.5812347531318665, + "rewards/rollout_eval_reward_func/mean": 0.14151422679424286, + "rewards/rollout_eval_reward_func/std": 0.2538794279098511, + "rewards/rollout_reward_func/mean": -0.0776321142911911, + "rewards/rollout_reward_func/std": 0.5845968723297119, + "sampling/importance_sampling_ratio/max": 1.8725090026855469, + "sampling/importance_sampling_ratio/mean": 0.9912927150726318, + "sampling/importance_sampling_ratio/min": 0.1565917581319809, + "sampling/sampling_logp_difference/max": 1.8541131019592285, + "sampling/sampling_logp_difference/mean": 0.06762713938951492, + "step": 15, + "step_time": 87.63701662399762 + }, + { + "clip_ratio/high_max": 0.033285985700786114, + "clip_ratio/high_mean": 0.02006899402476847, + "clip_ratio/low_mean": 0.017902423918712884, + "clip_ratio/low_min": 0.008303140406496823, + "clip_ratio/region_mean": 0.03797141805989668, + "entropy": 0.43832515366375446, + "epoch": 0.00016, + "grad_norm": 1.1862040758132935, + "kl": 0.2433762801811099, + "learning_rate": 4.2857142857142856e-05, + "loss": 0.1137, + "step": 16, + "step_time": 30.26940473900322 + }, + { + "clip_ratio/high_max": 0.005208333604969084, + "clip_ratio/high_mean": 0.002604166802484542, + "clip_ratio/low_mean": 0.0005208333604969084, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0031250001629814506, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10099.0, + "completions/max_terminated_length": 10099.0, + "completions/mean_length": 8931.625, + "completions/mean_terminated_length": 8931.625, + "completions/min_length": 2013.0, + "completions/min_terminated_length": 2013.0, + "entropy": 0.4058182891458273, + "epoch": 0.00017, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.206827998161316, + "kl": 0.1886005294509232, + "learning_rate": 4.5714285714285716e-05, + "loss": -0.0944, + "num_tokens": 2652765.0, + "reward": -0.0752231553196907, + "reward_std": 0.48041343688964844, + "rewards/rollout_eval_reward_func/mean": 0.10861280560493469, + "rewards/rollout_eval_reward_func/std": 0.2368263602256775, + "rewards/rollout_reward_func/mean": -0.0752231553196907, + "rewards/rollout_reward_func/std": 0.5091694593429565, + "sampling/importance_sampling_ratio/max": 2.2689177989959717, + "sampling/importance_sampling_ratio/mean": 1.0046234130859375, + "sampling/importance_sampling_ratio/min": 0.1846628040075302, + "sampling/sampling_logp_difference/max": 1.6892237663269043, + "sampling/sampling_logp_difference/mean": 0.06120520830154419, + "step": 17, + "step_time": 96.5394253049999 + }, + { + "clip_ratio/high_max": 0.0221070961561054, + "clip_ratio/high_mean": 0.013136881520040333, + "clip_ratio/low_mean": 0.005389189289417118, + "clip_ratio/low_min": 0.002066256827674806, + "clip_ratio/region_mean": 0.01852607080945745, + "entropy": 0.40752917528152466, + "epoch": 0.00018, + "grad_norm": 1.039859652519226, + "kl": 0.20007089478895068, + "learning_rate": 4.8571428571428576e-05, + "loss": -0.1064, + "step": 18, + "step_time": 29.607819763001316 + }, + { + "clip_ratio/high_max": 0.00424107164144516, + "clip_ratio/high_mean": 0.00212053582072258, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.00212053582072258, + "completions/clipped_ratio": 0.0, + "completions/max_length": 9868.0, + "completions/max_terminated_length": 9868.0, + "completions/mean_length": 7739.625, + "completions/mean_terminated_length": 7739.625, + "completions/min_length": 1494.0, + "completions/min_terminated_length": 1494.0, + "entropy": 0.3824189379811287, + "epoch": 0.00019, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.1822205781936646, + "kl": 0.1706448094919324, + "learning_rate": 5.142857142857143e-05, + "loss": -0.1187, + "num_tokens": 2929452.0, + "reward": 0.1796756088733673, + "reward_std": 0.6716787815093994, + "rewards/rollout_eval_reward_func/mean": 0.25978150963783264, + "rewards/rollout_eval_reward_func/std": 0.31619328260421753, + "rewards/rollout_reward_func/mean": 0.1796756088733673, + "rewards/rollout_reward_func/std": 0.6625394821166992, + "sampling/importance_sampling_ratio/max": 1.8655627965927124, + "sampling/importance_sampling_ratio/mean": 1.0000479221343994, + "sampling/importance_sampling_ratio/min": 0.33482789993286133, + "sampling/sampling_logp_difference/max": 1.0941386222839355, + "sampling/sampling_logp_difference/mean": 0.04819408059120178, + "step": 19, + "step_time": 92.65558583299753 + }, + { + "clip_ratio/high_max": 0.030015080701559782, + "clip_ratio/high_mean": 0.018132540630176663, + "clip_ratio/low_mean": 0.03180725604761392, + "clip_ratio/low_min": 0.0052083334885537624, + "clip_ratio/region_mean": 0.049939796910621226, + "entropy": 0.3580914381891489, + "epoch": 0.0002, + "grad_norm": 1.152976155281067, + "kl": 0.2634436935186386, + "learning_rate": 5.428571428571428e-05, + "loss": -0.1272, + "step": 20, + "step_time": 28.27301450500272 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0020833335001952946, + "clip_ratio/low_min": 0.0010416667209938169, + "clip_ratio/region_mean": 0.0020833335001952946, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10426.0, + "completions/max_terminated_length": 10426.0, + "completions/mean_length": 7911.40625, + "completions/mean_terminated_length": 7911.40625, + "completions/min_length": 1040.0, + "completions/min_terminated_length": 1040.0, + "entropy": 0.3455618601292372, + "epoch": 0.00021, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.9142677187919617, + "kl": 0.2354841867927462, + "learning_rate": 5.714285714285714e-05, + "loss": -0.0904, + "num_tokens": 3211621.0, + "reward": 0.09562171995639801, + "reward_std": 0.6017146706581116, + "rewards/rollout_eval_reward_func/mean": 0.1835619956254959, + "rewards/rollout_eval_reward_func/std": 0.2800058424472809, + "rewards/rollout_reward_func/mean": 0.09562171995639801, + "rewards/rollout_reward_func/std": 0.5979344248771667, + "sampling/importance_sampling_ratio/max": 1.7227435111999512, + "sampling/importance_sampling_ratio/mean": 0.9981924295425415, + "sampling/importance_sampling_ratio/min": 0.38243889808654785, + "sampling/sampling_logp_difference/max": 0.961186408996582, + "sampling/sampling_logp_difference/mean": 0.04361895099282265, + "step": 21, + "step_time": 94.40408171299714 + }, + { + "clip_ratio/high_max": 0.03222161578014493, + "clip_ratio/high_mean": 0.0181941413320601, + "clip_ratio/low_mean": 0.02708333428017795, + "clip_ratio/low_min": 0.0062500000931322575, + "clip_ratio/region_mean": 0.04527747584506869, + "entropy": 0.3229655371978879, + "epoch": 0.00022, + "grad_norm": 0.8647798895835876, + "kl": 0.21354854525998235, + "learning_rate": 6e-05, + "loss": -0.1008, + "step": 22, + "step_time": 30.11174104199381 + }, + { + "clip_ratio/high_max": 0.0011160714784637094, + "clip_ratio/high_mean": 0.0005580357392318547, + "clip_ratio/low_mean": 0.0010995370685122907, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0016575728077441454, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10470.0, + "completions/max_terminated_length": 10470.0, + "completions/mean_length": 7568.375, + "completions/mean_terminated_length": 7568.375, + "completions/min_length": 2202.0, + "completions/min_terminated_length": 2202.0, + "entropy": 0.28525836300104856, + "epoch": 0.00023, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.0814907550811768, + "kl": 0.35280791157856584, + "learning_rate": 6.285714285714286e-05, + "loss": 0.016, + "num_tokens": 3482436.0, + "reward": 0.2800288200378418, + "reward_std": 0.7106037139892578, + "rewards/rollout_eval_reward_func/mean": 0.33079269528388977, + "rewards/rollout_eval_reward_func/std": 0.3271085023880005, + "rewards/rollout_reward_func/mean": 0.2800288200378418, + "rewards/rollout_reward_func/std": 0.6996307373046875, + "sampling/importance_sampling_ratio/max": 1.6482936143875122, + "sampling/importance_sampling_ratio/mean": 1.0002542734146118, + "sampling/importance_sampling_ratio/min": 0.2758394777774811, + "sampling/sampling_logp_difference/max": 1.2879362106323242, + "sampling/sampling_logp_difference/mean": 0.0332026481628418, + "step": 23, + "step_time": 93.99063302500326 + }, + { + "clip_ratio/high_max": 0.01396139187272638, + "clip_ratio/high_mean": 0.007690923230256885, + "clip_ratio/low_mean": 0.01880787085974589, + "clip_ratio/low_min": 0.0031250001629814506, + "clip_ratio/region_mean": 0.02649879432283342, + "entropy": 0.2676102966070175, + "epoch": 0.00024, + "grad_norm": 0.8727543354034424, + "kl": 0.3772396189160645, + "learning_rate": 6.571428571428571e-05, + "loss": 0.0057, + "step": 24, + "step_time": 29.4178187339985 + }, + { + "clip_ratio/high_max": 0.004613095428794622, + "clip_ratio/high_mean": 0.002985895553138107, + "clip_ratio/low_mean": 0.0005208333604969084, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0035067289136350155, + "completions/clipped_ratio": 0.0, + "completions/max_length": 9569.0, + "completions/max_terminated_length": 9569.0, + "completions/mean_length": 7533.28125, + "completions/mean_terminated_length": 7533.28125, + "completions/min_length": 2449.0, + "completions/min_terminated_length": 2449.0, + "entropy": 0.2505391649901867, + "epoch": 0.00025, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.017386555671692, + "kl": 0.21523633878678083, + "learning_rate": 6.857142857142858e-05, + "loss": 0.0242, + "num_tokens": 3751699.0, + "reward": 0.3911706805229187, + "reward_std": 0.638326108455658, + "rewards/rollout_eval_reward_func/mean": 0.36318597197532654, + "rewards/rollout_eval_reward_func/std": 0.3184514343738556, + "rewards/rollout_reward_func/mean": 0.3911706805229187, + "rewards/rollout_reward_func/std": 0.6562069654464722, + "sampling/importance_sampling_ratio/max": 1.5404945611953735, + "sampling/importance_sampling_ratio/mean": 0.9984301328659058, + "sampling/importance_sampling_ratio/min": 0.4790920615196228, + "sampling/sampling_logp_difference/max": 0.7358624935150146, + "sampling/sampling_logp_difference/mean": 0.025531694293022156, + "step": 25, + "step_time": 92.37763964700025 + }, + { + "clip_ratio/high_max": 0.02074831852223724, + "clip_ratio/high_mean": 0.014075796061661094, + "clip_ratio/low_mean": 0.024038826406467706, + "clip_ratio/low_min": 0.004687500186264515, + "clip_ratio/region_mean": 0.0381146224681288, + "entropy": 0.24146342556923628, + "epoch": 0.00026, + "grad_norm": 1.08539617061615, + "kl": 0.242179695982486, + "learning_rate": 7.142857142857143e-05, + "loss": 0.0152, + "step": 26, + "step_time": 27.09601488199405 + }, + { + "clip_ratio/high_max": 0.004924242617562413, + "clip_ratio/high_mean": 0.0024621213087812066, + "clip_ratio/low_mean": 0.0015625000814907253, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.004024621390271932, + "completions/clipped_ratio": 0.0, + "completions/max_length": 9714.0, + "completions/max_terminated_length": 9714.0, + "completions/mean_length": 7341.125, + "completions/mean_terminated_length": 7341.125, + "completions/min_length": 834.0, + "completions/min_terminated_length": 834.0, + "entropy": 0.24662253353744745, + "epoch": 0.00027, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.0926475524902344, + "kl": 0.201888975687325, + "learning_rate": 7.428571428571429e-05, + "loss": -0.0651, + "num_tokens": 4014835.0, + "reward": 0.26619410514831543, + "reward_std": 0.6366387009620667, + "rewards/rollout_eval_reward_func/mean": 0.31529471278190613, + "rewards/rollout_eval_reward_func/std": 0.3177616000175476, + "rewards/rollout_reward_func/mean": 0.26619410514831543, + "rewards/rollout_reward_func/std": 0.6645346879959106, + "sampling/importance_sampling_ratio/max": 1.7210402488708496, + "sampling/importance_sampling_ratio/mean": 0.9990845918655396, + "sampling/importance_sampling_ratio/min": 0.46208029985427856, + "sampling/sampling_logp_difference/max": 0.7720166444778442, + "sampling/sampling_logp_difference/mean": 0.024712545797228813, + "step": 27, + "step_time": 90.07543276499928 + }, + { + "clip_ratio/high_max": 0.033208509092219174, + "clip_ratio/high_mean": 0.018557379313278943, + "clip_ratio/low_mean": 0.035281969350762665, + "clip_ratio/low_min": 0.011458333698101342, + "clip_ratio/region_mean": 0.05383934878045693, + "entropy": 0.24193121027201414, + "epoch": 0.00028, + "grad_norm": 0.9876235127449036, + "kl": 0.26401366433128715, + "learning_rate": 7.714285714285715e-05, + "loss": -0.073, + "step": 28, + "step_time": 27.219164144002207 + }, + { + "clip_ratio/high_max": 0.0010775862028822303, + "clip_ratio/high_mean": 0.0005387931014411151, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0005387931014411151, + "completions/clipped_ratio": 0.0, + "completions/max_length": 9855.0, + "completions/max_terminated_length": 9855.0, + "completions/mean_length": 7361.875, + "completions/mean_terminated_length": 7361.875, + "completions/min_length": 842.0, + "completions/min_terminated_length": 842.0, + "entropy": 0.21860306337475777, + "epoch": 0.00029, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.9292377233505249, + "kl": 0.20060417288914323, + "learning_rate": 8e-05, + "loss": -0.0977, + "num_tokens": 4278506.0, + "reward": 0.30670806765556335, + "reward_std": 0.652392566204071, + "rewards/rollout_eval_reward_func/mean": 0.3278709352016449, + "rewards/rollout_eval_reward_func/std": 0.31351709365844727, + "rewards/rollout_reward_func/mean": 0.30670806765556335, + "rewards/rollout_reward_func/std": 0.6815608143806458, + "sampling/importance_sampling_ratio/max": 1.4481010437011719, + "sampling/importance_sampling_ratio/mean": 1.0026426315307617, + "sampling/importance_sampling_ratio/min": 0.5693169832229614, + "sampling/sampling_logp_difference/max": 0.5633178949356079, + "sampling/sampling_logp_difference/mean": 0.01894025132060051, + "step": 29, + "step_time": 88.37378997200358 + }, + { + "clip_ratio/high_max": 0.02580322092399001, + "clip_ratio/high_mean": 0.015042814193293452, + "clip_ratio/low_mean": 0.015608090267051011, + "clip_ratio/low_min": 0.0020833334419876337, + "clip_ratio/region_mean": 0.030650904460344464, + "entropy": 0.2232473948970437, + "epoch": 0.0003, + "grad_norm": 0.6086679697036743, + "kl": 0.19415233470499516, + "learning_rate": 8.285714285714287e-05, + "loss": -0.1081, + "step": 30, + "step_time": 28.619991764000588 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0005208333604969084, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0005208333604969084, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10726.0, + "completions/max_terminated_length": 10726.0, + "completions/mean_length": 7164.65625, + "completions/mean_terminated_length": 7164.65625, + "completions/min_length": 470.0, + "completions/min_terminated_length": 470.0, + "entropy": 0.23761425912380219, + "epoch": 0.00031, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.050552487373352, + "kl": 0.25638002483174205, + "learning_rate": 8.571428571428571e-05, + "loss": 0.012, + "num_tokens": 4536097.0, + "reward": 0.3345087766647339, + "reward_std": 0.5485296249389648, + "rewards/rollout_eval_reward_func/mean": 0.3090701103210449, + "rewards/rollout_eval_reward_func/std": 0.32714226841926575, + "rewards/rollout_reward_func/mean": 0.3345087766647339, + "rewards/rollout_reward_func/std": 0.6012357473373413, + "sampling/importance_sampling_ratio/max": 1.438549518585205, + "sampling/importance_sampling_ratio/mean": 1.0011037588119507, + "sampling/importance_sampling_ratio/min": 0.6349728107452393, + "sampling/sampling_logp_difference/max": 0.45417308807373047, + "sampling/sampling_logp_difference/mean": 0.015337169170379639, + "step": 31, + "step_time": 92.49027231299806 + }, + { + "clip_ratio/high_max": 0.03391559107694775, + "clip_ratio/high_mean": 0.018867517996113747, + "clip_ratio/low_mean": 0.044338769221212715, + "clip_ratio/low_min": 0.008333333535119891, + "clip_ratio/region_mean": 0.06320628756657243, + "entropy": 0.22916866652667522, + "epoch": 0.00032, + "grad_norm": 1.028586745262146, + "kl": 0.3105860697105527, + "learning_rate": 8.857142857142857e-05, + "loss": 0.0055, + "step": 32, + "step_time": 29.399824877003994 + }, + { + "clip_ratio/high_max": 0.0024519230937585235, + "clip_ratio/high_mean": 0.0012259615468792617, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0012259615468792617, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10128.0, + "completions/max_terminated_length": 10128.0, + "completions/mean_length": 7357.46875, + "completions/mean_terminated_length": 7357.46875, + "completions/min_length": 1917.0, + "completions/min_terminated_length": 1917.0, + "entropy": 0.2557551637291908, + "epoch": 0.00033, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.9717881083488464, + "kl": 0.2046954189427197, + "learning_rate": 9.142857142857143e-05, + "loss": 0.0245, + "num_tokens": 4799621.0, + "reward": 0.35216301679611206, + "reward_std": 0.6164546608924866, + "rewards/rollout_eval_reward_func/mean": 0.3365091383457184, + "rewards/rollout_eval_reward_func/std": 0.3354848027229309, + "rewards/rollout_reward_func/mean": 0.35216301679611206, + "rewards/rollout_reward_func/std": 0.6309141516685486, + "sampling/importance_sampling_ratio/max": 1.333243727684021, + "sampling/importance_sampling_ratio/mean": 1.0005223751068115, + "sampling/importance_sampling_ratio/min": 0.7339702248573303, + "sampling/sampling_logp_difference/max": 0.30928683280944824, + "sampling/sampling_logp_difference/mean": 0.014704002998769283, + "step": 33, + "step_time": 89.53553034700417 + }, + { + "clip_ratio/high_max": 0.01991061063017696, + "clip_ratio/high_mean": 0.011966300604399294, + "clip_ratio/low_mean": 0.02272569522028789, + "clip_ratio/low_min": 0.009722222457639873, + "clip_ratio/region_mean": 0.03469199570827186, + "entropy": 0.2428069869056344, + "epoch": 0.00034, + "grad_norm": 0.685612142086029, + "kl": 0.2513351505622268, + "learning_rate": 9.428571428571429e-05, + "loss": 0.0129, + "step": 34, + "step_time": 28.25809028400181 + }, + { + "clip_ratio/high_max": 0.0021990741370245814, + "clip_ratio/high_mean": 0.0010995370685122907, + "clip_ratio/low_mean": 0.0010416667209938169, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0021412037895061076, + "completions/clipped_ratio": 0.0, + "completions/max_length": 9665.0, + "completions/max_terminated_length": 9665.0, + "completions/mean_length": 8000.09375, + "completions/mean_terminated_length": 8000.09375, + "completions/min_length": 4295.0, + "completions/min_terminated_length": 4295.0, + "entropy": 0.2354184165596962, + "epoch": 0.00035, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.040405035018921, + "kl": 0.1770010399632156, + "learning_rate": 9.714285714285715e-05, + "loss": 0.1517, + "num_tokens": 5084103.0, + "reward": 0.3385156989097595, + "reward_std": 0.5189785957336426, + "rewards/rollout_eval_reward_func/mean": 0.23996442556381226, + "rewards/rollout_eval_reward_func/std": 0.31991085410118103, + "rewards/rollout_reward_func/mean": 0.3385156989097595, + "rewards/rollout_reward_func/std": 0.5693588852882385, + "sampling/importance_sampling_ratio/max": 1.4071576595306396, + "sampling/importance_sampling_ratio/mean": 0.9996304512023926, + "sampling/importance_sampling_ratio/min": 0.5387703776359558, + "sampling/sampling_logp_difference/max": 0.6184659004211426, + "sampling/sampling_logp_difference/mean": 0.015029089525341988, + "step": 35, + "step_time": 95.05921310200392 + }, + { + "clip_ratio/high_max": 0.026263557723723352, + "clip_ratio/high_mean": 0.014173445466440171, + "clip_ratio/low_mean": 0.02787990286014974, + "clip_ratio/low_min": 0.007291667046956718, + "clip_ratio/region_mean": 0.04205334832658991, + "entropy": 0.21858789399266243, + "epoch": 0.00036, + "grad_norm": 1.0455042123794556, + "kl": 0.2051441869698465, + "learning_rate": 0.0001, + "loss": 0.1403, + "step": 36, + "step_time": 27.85193802100366 + }, + { + "clip_ratio/high_max": 0.007164939888752997, + "clip_ratio/high_mean": 0.0035824699443764985, + "clip_ratio/low_mean": 0.0015625000232830644, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.005144969967659563, + "completions/clipped_ratio": 0.0, + "completions/max_length": 9514.0, + "completions/max_terminated_length": 9514.0, + "completions/mean_length": 6029.25, + "completions/mean_terminated_length": 6029.25, + "completions/min_length": 1061.0, + "completions/min_terminated_length": 1061.0, + "entropy": 0.21716525312513113, + "epoch": 0.00037, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.8604521751403809, + "kl": 0.23097522975876927, + "learning_rate": 9.999736485702831e-05, + "loss": -0.0709, + "num_tokens": 5305345.0, + "reward": 0.41453179717063904, + "reward_std": 0.7797224521636963, + "rewards/rollout_eval_reward_func/mean": 0.4568089246749878, + "rewards/rollout_eval_reward_func/std": 0.28734299540519714, + "rewards/rollout_reward_func/mean": 0.41453179717063904, + "rewards/rollout_reward_func/std": 0.755694568157196, + "sampling/importance_sampling_ratio/max": 1.4738141298294067, + "sampling/importance_sampling_ratio/mean": 1.000828742980957, + "sampling/importance_sampling_ratio/min": 0.7324953079223633, + "sampling/sampling_logp_difference/max": 0.3878536820411682, + "sampling/sampling_logp_difference/mean": 0.013184964656829834, + "step": 37, + "step_time": 76.87407001600332 + }, + { + "clip_ratio/high_max": 0.04774210066534579, + "clip_ratio/high_mean": 0.02752261853311211, + "clip_ratio/low_mean": 0.03158482233993709, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.059107440523803234, + "entropy": 0.21499714627861977, + "epoch": 0.00038, + "grad_norm": 1.026845932006836, + "kl": 0.3676267918199301, + "learning_rate": 9.998945979845876e-05, + "loss": -0.0694, + "step": 38, + "step_time": 27.58343887600313 + }, + { + "clip_ratio/high_max": 0.006285919691435993, + "clip_ratio/high_mean": 0.0031429598457179964, + "clip_ratio/low_mean": 0.0010416667209938169, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.004184626566711813, + "completions/clipped_ratio": 0.0, + "completions/max_length": 9353.0, + "completions/max_terminated_length": 9353.0, + "completions/mean_length": 6221.78125, + "completions/mean_terminated_length": 6221.78125, + "completions/min_length": 1175.0, + "completions/min_terminated_length": 1175.0, + "entropy": 0.21314978785812855, + "epoch": 0.00039, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.1063776016235352, + "kl": 0.28443425707519054, + "learning_rate": 9.997628593527586e-05, + "loss": 0.1657, + "num_tokens": 5533203.0, + "reward": 0.5931290984153748, + "reward_std": 0.5068180561065674, + "rewards/rollout_eval_reward_func/mean": 0.4369918704032898, + "rewards/rollout_eval_reward_func/std": 0.2919425666332245, + "rewards/rollout_reward_func/mean": 0.5931290984153748, + "rewards/rollout_reward_func/std": 0.6152276396751404, + "sampling/importance_sampling_ratio/max": 1.4768017530441284, + "sampling/importance_sampling_ratio/mean": 0.9989122152328491, + "sampling/importance_sampling_ratio/min": 0.7442160248756409, + "sampling/sampling_logp_difference/max": 0.3898787498474121, + "sampling/sampling_logp_difference/mean": 0.011076296679675579, + "step": 39, + "step_time": 80.26773473300273 + }, + { + "clip_ratio/high_max": 0.03581550612580031, + "clip_ratio/high_mean": 0.021467003040015697, + "clip_ratio/low_mean": 0.019476010755170137, + "clip_ratio/low_min": 0.0031250000465661287, + "clip_ratio/region_mean": 0.04094301396980882, + "entropy": 0.2001811731606722, + "epoch": 0.0004, + "grad_norm": 0.8571550250053406, + "kl": 0.39517259504646063, + "learning_rate": 9.995784511894694e-05, + "loss": 0.1561, + "step": 40, + "step_time": 26.113719172002675 + }, + { + "clip_ratio/high_max": 0.0027173913549631834, + "clip_ratio/high_mean": 0.0013586956774815917, + "clip_ratio/low_mean": 0.003238224715460092, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0045969203929416835, + "completions/clipped_ratio": 0.0, + "completions/max_length": 9876.0, + "completions/max_terminated_length": 9876.0, + "completions/mean_length": 7216.5625, + "completions/mean_terminated_length": 7216.5625, + "completions/min_length": 1879.0, + "completions/min_terminated_length": 1879.0, + "entropy": 0.2681358586996794, + "epoch": 0.00041, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.3309671878814697, + "kl": 0.2410402470268309, + "learning_rate": 9.993413994116206e-05, + "loss": 0.1903, + "num_tokens": 5792478.0, + "reward": 0.471214234828949, + "reward_std": 0.5625734329223633, + "rewards/rollout_eval_reward_func/mean": 0.3643292784690857, + "rewards/rollout_eval_reward_func/std": 0.34053289890289307, + "rewards/rollout_reward_func/mean": 0.471214234828949, + "rewards/rollout_reward_func/std": 0.6072424650192261, + "sampling/importance_sampling_ratio/max": 1.8356192111968994, + "sampling/importance_sampling_ratio/mean": 1.0007987022399902, + "sampling/importance_sampling_ratio/min": 0.4829617738723755, + "sampling/sampling_logp_difference/max": 0.7278177738189697, + "sampling/sampling_logp_difference/mean": 0.014709306880831718, + "step": 41, + "step_time": 87.47009326799707 + }, + { + "clip_ratio/high_max": 0.034506134572438896, + "clip_ratio/high_mean": 0.01836913888109848, + "clip_ratio/low_mean": 0.03956068912521005, + "clip_ratio/low_min": 0.012500000651925802, + "clip_ratio/region_mean": 0.05792982783168554, + "entropy": 0.27205855678766966, + "epoch": 0.00042, + "grad_norm": 1.0188957452774048, + "kl": 0.30527770798653364, + "learning_rate": 9.990517373346957e-05, + "loss": 0.1841, + "step": 42, + "step_time": 27.952364619004584 + }, + { + "clip_ratio/high_max": 0.005300949211232364, + "clip_ratio/high_mean": 0.002650474605616182, + "clip_ratio/low_mean": 0.0015625000814907253, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.004212974687106907, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10094.0, + "completions/max_terminated_length": 10094.0, + "completions/mean_length": 6369.84375, + "completions/mean_terminated_length": 6369.84375, + "completions/min_length": 701.0, + "completions/min_terminated_length": 701.0, + "entropy": 0.24548510648310184, + "epoch": 0.00043, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.8371849656105042, + "kl": 0.22607734380289912, + "learning_rate": 9.98709505668081e-05, + "loss": -0.1383, + "num_tokens": 6024570.0, + "reward": 0.5083565711975098, + "reward_std": 0.7129669785499573, + "rewards/rollout_eval_reward_func/mean": 0.4181910753250122, + "rewards/rollout_eval_reward_func/std": 0.3106958866119385, + "rewards/rollout_reward_func/mean": 0.5083565711975098, + "rewards/rollout_reward_func/std": 0.679851770401001, + "sampling/importance_sampling_ratio/max": 1.6035348176956177, + "sampling/importance_sampling_ratio/mean": 1.0009479522705078, + "sampling/importance_sampling_ratio/min": 0.7113155722618103, + "sampling/sampling_logp_difference/max": 0.4722104072570801, + "sampling/sampling_logp_difference/mean": 0.010827964171767235, + "step": 43, + "step_time": 81.8608712560017 + }, + { + "clip_ratio/high_max": 0.022805775748565793, + "clip_ratio/high_mean": 0.01218413794413209, + "clip_ratio/low_mean": 0.026488096278626472, + "clip_ratio/low_min": 0.0020833334419876337, + "clip_ratio/region_mean": 0.03867223463021219, + "entropy": 0.2484031356871128, + "epoch": 0.00044, + "grad_norm": 0.6352972388267517, + "kl": 0.24903920874930918, + "learning_rate": 9.983147525093428e-05, + "loss": -0.1456, + "step": 44, + "step_time": 28.312056484001005 + }, + { + "clip_ratio/high_max": 0.0020833334419876337, + "clip_ratio/high_mean": 0.0010416667209938169, + "clip_ratio/low_mean": 0.0010416667209938169, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0020833334419876337, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10030.0, + "completions/max_terminated_length": 10030.0, + "completions/mean_length": 7470.40625, + "completions/mean_terminated_length": 7470.40625, + "completions/min_length": 3212.0, + "completions/min_terminated_length": 3212.0, + "entropy": 0.26859680097550154, + "epoch": 0.00045, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.9950742125511169, + "kl": 0.2715269709005952, + "learning_rate": 9.978675333374685e-05, + "loss": 0.1354, + "num_tokens": 6292193.0, + "reward": 0.31536591053009033, + "reward_std": 0.626213550567627, + "rewards/rollout_eval_reward_func/mean": 0.2950965166091919, + "rewards/rollout_eval_reward_func/std": 0.3288768529891968, + "rewards/rollout_reward_func/mean": 0.31536591053009033, + "rewards/rollout_reward_func/std": 0.6272794604301453, + "sampling/importance_sampling_ratio/max": 1.2761257886886597, + "sampling/importance_sampling_ratio/mean": 0.9995177388191223, + "sampling/importance_sampling_ratio/min": 0.6398259401321411, + "sampling/sampling_logp_difference/max": 0.44655919075012207, + "sampling/sampling_logp_difference/mean": 0.01289924792945385, + "step": 45, + "step_time": 89.98842330299703 + }, + { + "clip_ratio/high_max": 0.029475471819750965, + "clip_ratio/high_mean": 0.017039196158293635, + "clip_ratio/low_mean": 0.035884891636669636, + "clip_ratio/low_min": 0.014583333861082792, + "clip_ratio/region_mean": 0.05292408773675561, + "entropy": 0.25596251618117094, + "epoch": 0.00046, + "grad_norm": 1.0492225885391235, + "kl": 0.4555607410147786, + "learning_rate": 9.973679110050689e-05, + "loss": 0.1236, + "step": 46, + "step_time": 28.10059149600238 + }, + { + "clip_ratio/high_max": 0.005558473523706198, + "clip_ratio/high_mean": 0.002779236761853099, + "clip_ratio/low_mean": 0.0031250001629814506, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.005904236924834549, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10171.0, + "completions/max_terminated_length": 10171.0, + "completions/mean_length": 7720.34375, + "completions/mean_terminated_length": 7720.34375, + "completions/min_length": 2255.0, + "completions/min_terminated_length": 2255.0, + "entropy": 0.21848125476390123, + "epoch": 0.00047, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.9580699801445007, + "kl": 0.2126072864048183, + "learning_rate": 9.968159557295458e-05, + "loss": 0.2391, + "num_tokens": 6567972.0, + "reward": 0.585047721862793, + "reward_std": 0.4849390387535095, + "rewards/rollout_eval_reward_func/mean": 0.35200709104537964, + "rewards/rollout_eval_reward_func/std": 0.33855971693992615, + "rewards/rollout_reward_func/mean": 0.585047721862793, + "rewards/rollout_reward_func/std": 0.4694308936595917, + "sampling/importance_sampling_ratio/max": 1.3900582790374756, + "sampling/importance_sampling_ratio/mean": 1.0005149841308594, + "sampling/importance_sampling_ratio/min": 0.5463369488716125, + "sampling/sampling_logp_difference/max": 0.6045193672180176, + "sampling/sampling_logp_difference/mean": 0.012745920568704605, + "step": 47, + "step_time": 91.15270540599704 + }, + { + "clip_ratio/high_max": 0.03133936191443354, + "clip_ratio/high_mean": 0.017232181096915156, + "clip_ratio/low_mean": 0.04218750132713467, + "clip_ratio/low_min": 0.01145833358168602, + "clip_ratio/region_mean": 0.059419682365842164, + "entropy": 0.23045554850250483, + "epoch": 0.00048, + "grad_norm": 1.2474925518035889, + "kl": 0.18294932693243027, + "learning_rate": 9.962117450832225e-05, + "loss": 0.238, + "step": 48, + "step_time": 29.046616760999314 + }, + { + "clip_ratio/high_max": 0.006842764443717897, + "clip_ratio/high_mean": 0.0034213822218589485, + "clip_ratio/low_mean": 0.0015625000232830644, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.004983882245142013, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10017.0, + "completions/max_terminated_length": 10017.0, + "completions/mean_length": 7918.40625, + "completions/mean_terminated_length": 7918.40625, + "completions/min_length": 1876.0, + "completions/min_terminated_length": 1876.0, + "entropy": 0.24847039952874184, + "epoch": 0.00049, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.1504110097885132, + "kl": 0.3213502997532487, + "learning_rate": 9.955553639824423e-05, + "loss": 0.1906, + "num_tokens": 6849638.0, + "reward": 0.39189645648002625, + "reward_std": 0.5209037065505981, + "rewards/rollout_eval_reward_func/mean": 0.2815040647983551, + "rewards/rollout_eval_reward_func/std": 0.332853227853775, + "rewards/rollout_reward_func/mean": 0.39189645648002625, + "rewards/rollout_reward_func/std": 0.5881980061531067, + "sampling/importance_sampling_ratio/max": 1.4030216932296753, + "sampling/importance_sampling_ratio/mean": 0.9992052316665649, + "sampling/importance_sampling_ratio/min": 0.6490213871002197, + "sampling/sampling_logp_difference/max": 0.43228960037231445, + "sampling/sampling_logp_difference/mean": 0.011766092851758003, + "step": 49, + "step_time": 91.98302743600289 + }, + { + "clip_ratio/high_max": 0.030021664802916348, + "clip_ratio/high_mean": 0.01896916568512097, + "clip_ratio/low_mean": 0.02840909146470949, + "clip_ratio/low_min": 0.0031250001629814506, + "clip_ratio/region_mean": 0.04737825732445344, + "entropy": 0.22083801217377186, + "epoch": 0.0005, + "grad_norm": 1.493245005607605, + "kl": 0.6161252139136195, + "learning_rate": 9.948469046756344e-05, + "loss": 0.1882, + "step": 50, + "step_time": 29.706524382998396 + }, + { + "clip_ratio/high_max": 0.007615459966473281, + "clip_ratio/high_mean": 0.0038077299832366407, + "clip_ratio/low_mean": 0.0010416667209938169, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0048493967042304575, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10057.0, + "completions/max_terminated_length": 10057.0, + "completions/mean_length": 7061.03125, + "completions/mean_terminated_length": 7061.03125, + "completions/min_length": 2525.0, + "completions/min_terminated_length": 2525.0, + "entropy": 0.24380221962928772, + "epoch": 0.00051, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.1951247453689575, + "kl": 0.28687382210046053, + "learning_rate": 9.940864667303489e-05, + "loss": 0.1425, + "num_tokens": 7103728.0, + "reward": 0.406146377325058, + "reward_std": 0.6755715608596802, + "rewards/rollout_eval_reward_func/mean": 0.3859247863292694, + "rewards/rollout_eval_reward_func/std": 0.33643871545791626, + "rewards/rollout_reward_func/mean": 0.406146377325058, + "rewards/rollout_reward_func/std": 0.6774359345436096, + "sampling/importance_sampling_ratio/max": 1.367674469947815, + "sampling/importance_sampling_ratio/mean": 0.9991032481193542, + "sampling/importance_sampling_ratio/min": 0.6542518734931946, + "sampling/sampling_logp_difference/max": 0.4242628812789917, + "sampling/sampling_logp_difference/mean": 0.012621527537703514, + "step": 51, + "step_time": 85.52853098199739 + }, + { + "clip_ratio/high_max": 0.023708798456937075, + "clip_ratio/high_mean": 0.015155438333749771, + "clip_ratio/low_mean": 0.02644535672152415, + "clip_ratio/low_min": 0.009695513173937798, + "clip_ratio/region_mean": 0.04160079546272755, + "entropy": 0.24589570611715317, + "epoch": 0.00052, + "grad_norm": 0.6901561617851257, + "kl": 0.2809536149725318, + "learning_rate": 9.932741570192633e-05, + "loss": 0.1278, + "step": 52, + "step_time": 28.923457664002854 + }, + { + "clip_ratio/high_max": 0.0011160714784637094, + "clip_ratio/high_mean": 0.0005580357392318547, + "clip_ratio/low_mean": 0.0005208333604969084, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.001078869099728763, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10169.0, + "completions/max_terminated_length": 10169.0, + "completions/mean_length": 7814.28125, + "completions/mean_terminated_length": 7814.28125, + "completions/min_length": 1989.0, + "completions/min_terminated_length": 1989.0, + "entropy": 0.21275948453694582, + "epoch": 0.00053, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.7513790130615234, + "kl": 0.23512054793536663, + "learning_rate": 9.924100897051629e-05, + "loss": 0.1945, + "num_tokens": 7382261.0, + "reward": 0.42753756046295166, + "reward_std": 0.49785035848617554, + "rewards/rollout_eval_reward_func/mean": 0.26969003677368164, + "rewards/rollout_eval_reward_func/std": 0.3341839611530304, + "rewards/rollout_reward_func/mean": 0.42753756046295166, + "rewards/rollout_reward_func/std": 0.49307680130004883, + "sampling/importance_sampling_ratio/max": 1.3325144052505493, + "sampling/importance_sampling_ratio/mean": 0.9995752573013306, + "sampling/importance_sampling_ratio/min": 0.6147154569625854, + "sampling/sampling_logp_difference/max": 0.48659586906433105, + "sampling/sampling_logp_difference/mean": 0.010477245785295963, + "step": 53, + "step_time": 89.77598898800352 + }, + { + "clip_ratio/high_max": 0.014756215270608664, + "clip_ratio/high_mean": 0.007378107635304332, + "clip_ratio/low_mean": 0.026041667733807117, + "clip_ratio/low_min": 0.008333333651535213, + "clip_ratio/region_mean": 0.03341977560194209, + "entropy": 0.20128578413277864, + "epoch": 0.00054, + "grad_norm": 0.570249080657959, + "kl": 0.24723996873944998, + "learning_rate": 9.914943862248966e-05, + "loss": 0.1836, + "step": 54, + "step_time": 28.66781206799169 + }, + { + "clip_ratio/high_max": 0.005409664008766413, + "clip_ratio/high_mean": 0.0027048320043832064, + "clip_ratio/low_mean": 0.0005208333604969084, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.003225665364880115, + "completions/clipped_ratio": 0.0, + "completions/max_length": 9612.0, + "completions/max_terminated_length": 9612.0, + "completions/mean_length": 7466.40625, + "completions/mean_terminated_length": 7466.40625, + "completions/min_length": 897.0, + "completions/min_terminated_length": 897.0, + "entropy": 0.2242852784693241, + "epoch": 0.00055, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.7776551246643066, + "kl": 0.22325131320394576, + "learning_rate": 9.905271752723088e-05, + "loss": 0.0206, + "num_tokens": 7648812.0, + "reward": 0.40199464559555054, + "reward_std": 0.5598407983779907, + "rewards/rollout_eval_reward_func/mean": 0.3375253975391388, + "rewards/rollout_eval_reward_func/std": 0.351279616355896, + "rewards/rollout_reward_func/mean": 0.40199464559555054, + "rewards/rollout_reward_func/std": 0.5975609421730042, + "sampling/importance_sampling_ratio/max": 1.317135214805603, + "sampling/importance_sampling_ratio/mean": 0.9976714849472046, + "sampling/importance_sampling_ratio/min": 0.6417545676231384, + "sampling/sampling_logp_difference/max": 0.4435492753982544, + "sampling/sampling_logp_difference/mean": 0.012365585193037987, + "step": 55, + "step_time": 90.48158546899867 + }, + { + "clip_ratio/high_max": 0.02967093954794109, + "clip_ratio/high_mean": 0.01639796979725361, + "clip_ratio/low_mean": 0.017361111822538078, + "clip_ratio/low_min": 0.0031250001629814506, + "clip_ratio/region_mean": 0.03375908185262233, + "entropy": 0.2281673550605774, + "epoch": 0.00056, + "grad_norm": 0.48372626304626465, + "kl": 0.23605143558233976, + "learning_rate": 9.895085927801542e-05, + "loss": 0.0086, + "step": 56, + "step_time": 27.291444884000157 + }, + { + "clip_ratio/high_max": 0.003557769814506173, + "clip_ratio/high_mean": 0.0017788849072530866, + "clip_ratio/low_mean": 0.0015625000814907253, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.003341384930536151, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10322.0, + "completions/max_terminated_length": 10322.0, + "completions/mean_length": 6645.3125, + "completions/mean_terminated_length": 6645.3125, + "completions/min_length": 1995.0, + "completions/min_terminated_length": 1995.0, + "entropy": 0.22339679207652807, + "epoch": 0.00057, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.7947802543640137, + "kl": 0.33472199738025665, + "learning_rate": 9.884387819009922e-05, + "loss": 0.0286, + "num_tokens": 7889241.0, + "reward": 0.3272937536239624, + "reward_std": 0.7356898784637451, + "rewards/rollout_eval_reward_func/mean": 0.38528963923454285, + "rewards/rollout_eval_reward_func/std": 0.3158987760543823, + "rewards/rollout_reward_func/mean": 0.3272937536239624, + "rewards/rollout_reward_func/std": 0.7287615537643433, + "sampling/importance_sampling_ratio/max": 1.519856333732605, + "sampling/importance_sampling_ratio/mean": 1.0008394718170166, + "sampling/importance_sampling_ratio/min": 0.6888355612754822, + "sampling/sampling_logp_difference/max": 0.41861581802368164, + "sampling/sampling_logp_difference/mean": 0.01188460923731327, + "step": 57, + "step_time": 83.6079965079989 + }, + { + "clip_ratio/high_max": 0.02337649872060865, + "clip_ratio/high_mean": 0.012729916197713464, + "clip_ratio/low_mean": 0.03550771565642208, + "clip_ratio/low_min": 0.013886852888390422, + "clip_ratio/region_mean": 0.048237632028758526, + "entropy": 0.23247116059064865, + "epoch": 0.00058, + "grad_norm": 0.6895915269851685, + "kl": 0.30278117302805185, + "learning_rate": 9.873178929870695e-05, + "loss": 0.0178, + "step": 58, + "step_time": 29.01562165299947 + }, + { + "clip_ratio/high_max": 0.006458333344198763, + "clip_ratio/high_mean": 0.00375000003259629, + "clip_ratio/low_mean": 0.0010416667209938169, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.004791666753590107, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10121.0, + "completions/max_terminated_length": 10121.0, + "completions/mean_length": 7354.9375, + "completions/mean_terminated_length": 7354.9375, + "completions/min_length": 1114.0, + "completions/min_terminated_length": 1114.0, + "entropy": 0.2855970785021782, + "epoch": 0.00059, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.1581138372421265, + "kl": 0.30998079385608435, + "learning_rate": 9.86146083569188e-05, + "loss": -0.077, + "num_tokens": 8152533.0, + "reward": 0.12930195033550262, + "reward_std": 0.6660091876983643, + "rewards/rollout_eval_reward_func/mean": 0.33841466903686523, + "rewards/rollout_eval_reward_func/std": 0.3268774151802063, + "rewards/rollout_reward_func/mean": 0.12930195033550262, + "rewards/rollout_reward_func/std": 0.7711123824119568, + "sampling/importance_sampling_ratio/max": 1.4381568431854248, + "sampling/importance_sampling_ratio/mean": 0.9980136156082153, + "sampling/importance_sampling_ratio/min": 0.7020198106765747, + "sampling/sampling_logp_difference/max": 0.36336231231689453, + "sampling/sampling_logp_difference/mean": 0.016959059983491898, + "step": 59, + "step_time": 87.87077508199764 + }, + { + "clip_ratio/high_max": 0.048061754438094795, + "clip_ratio/high_mean": 0.031483913655392826, + "clip_ratio/low_mean": 0.04418836906552315, + "clip_ratio/low_min": 0.007291666814126074, + "clip_ratio/region_mean": 0.07567228260450065, + "entropy": 0.26963882334530354, + "epoch": 0.0006, + "grad_norm": 1.0022964477539062, + "kl": 0.30027929320931435, + "learning_rate": 9.84923518334567e-05, + "loss": -0.0828, + "step": 60, + "step_time": 28.71259851099967 + }, + { + "clip_ratio/high_max": 0.01005121401976794, + "clip_ratio/high_mean": 0.005546440428588539, + "clip_ratio/low_mean": 0.0020026409183628857, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.007549081346951425, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10379.0, + "completions/max_terminated_length": 10379.0, + "completions/mean_length": 6955.875, + "completions/mean_terminated_length": 6955.875, + "completions/min_length": 2081.0, + "completions/min_terminated_length": 2081.0, + "entropy": 0.23451983137056231, + "epoch": 0.00061, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.1702102422714233, + "kl": 0.28609952982515097, + "learning_rate": 9.83650369103696e-05, + "loss": 0.0631, + "num_tokens": 8403186.0, + "reward": 0.3940112888813019, + "reward_std": 0.67255699634552, + "rewards/rollout_eval_reward_func/mean": 0.3715701103210449, + "rewards/rollout_eval_reward_func/std": 0.3261474072933197, + "rewards/rollout_reward_func/mean": 0.3940112888813019, + "rewards/rollout_reward_func/std": 0.6762000322341919, + "sampling/importance_sampling_ratio/max": 1.3093942403793335, + "sampling/importance_sampling_ratio/mean": 1.0008020401000977, + "sampling/importance_sampling_ratio/min": 0.5961512923240662, + "sampling/sampling_logp_difference/max": 0.5172607898712158, + "sampling/sampling_logp_difference/mean": 0.014042183756828308, + "step": 61, + "step_time": 86.51144317899707 + }, + { + "clip_ratio/high_max": 0.051156656933017075, + "clip_ratio/high_mean": 0.03779221937293187, + "clip_ratio/low_mean": 0.05211732583120465, + "clip_ratio/low_min": 0.024354460649192333, + "clip_ratio/region_mean": 0.08990954549517483, + "entropy": 0.21548824943602085, + "epoch": 0.00062, + "grad_norm": 1.1680642366409302, + "kl": 0.5453370595350862, + "learning_rate": 9.823268148061883e-05, + "loss": 0.0666, + "step": 62, + "step_time": 28.28093677799916 + }, + { + "clip_ratio/high_max": 0.009642903693020344, + "clip_ratio/high_mean": 0.004821451846510172, + "clip_ratio/low_mean": 0.0010416667209938169, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.005863118567503989, + "completions/clipped_ratio": 0.0, + "completions/max_length": 9611.0, + "completions/max_terminated_length": 9611.0, + "completions/mean_length": 5474.5, + "completions/mean_terminated_length": 5474.5, + "completions/min_length": 1264.0, + "completions/min_terminated_length": 1264.0, + "entropy": 0.20994199626147747, + "epoch": 0.00063, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.5162609815597534, + "kl": 0.3376044826582074, + "learning_rate": 9.809530414556335e-05, + "loss": 0.1386, + "num_tokens": 8606212.0, + "reward": 0.6158473491668701, + "reward_std": 0.705132782459259, + "rewards/rollout_eval_reward_func/mean": 0.5119410753250122, + "rewards/rollout_eval_reward_func/std": 0.2654803693294525, + "rewards/rollout_reward_func/mean": 0.6158473491668701, + "rewards/rollout_reward_func/std": 0.6767383813858032, + "sampling/importance_sampling_ratio/max": 1.9123412370681763, + "sampling/importance_sampling_ratio/mean": 0.9994137287139893, + "sampling/importance_sampling_ratio/min": 0.6006231904029846, + "sampling/sampling_logp_difference/max": 0.6483283042907715, + "sampling/sampling_logp_difference/mean": 0.015111252665519714, + "step": 63, + "step_time": 74.64896667399807 + }, + { + "clip_ratio/high_max": 0.05132549628615379, + "clip_ratio/high_mean": 0.030718339723534882, + "clip_ratio/low_mean": 0.028882576967589557, + "clip_ratio/low_min": 0.0031250000465661287, + "clip_ratio/region_mean": 0.05960091657470912, + "entropy": 0.20065013086423278, + "epoch": 0.00064, + "grad_norm": 1.244667649269104, + "kl": 0.453593029640615, + "learning_rate": 9.79529242123455e-05, + "loss": 0.1234, + "step": 64, + "step_time": 24.8986287849948 + }, + { + "clip_ratio/high_max": 0.0077537596225738525, + "clip_ratio/high_mean": 0.0038768798112869263, + "clip_ratio/low_mean": 0.0005208333604969084, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.004397713171783835, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10299.0, + "completions/max_terminated_length": 10299.0, + "completions/mean_length": 6500.03125, + "completions/mean_terminated_length": 6500.03125, + "completions/min_length": 1712.0, + "completions/min_terminated_length": 1712.0, + "entropy": 0.14482268318533897, + "epoch": 0.00065, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.9947667121887207, + "kl": 0.2627185983583331, + "learning_rate": 9.780556169117757e-05, + "loss": 0.0665, + "num_tokens": 8841902.0, + "reward": 0.6678237915039062, + "reward_std": 0.5866862535476685, + "rewards/rollout_eval_reward_func/mean": 0.5013973712921143, + "rewards/rollout_eval_reward_func/std": 0.27832266688346863, + "rewards/rollout_reward_func/mean": 0.6678237915039062, + "rewards/rollout_reward_func/std": 0.5921808481216431, + "sampling/importance_sampling_ratio/max": 1.4597694873809814, + "sampling/importance_sampling_ratio/mean": 0.99915611743927, + "sampling/importance_sampling_ratio/min": 0.27695003151893616, + "sampling/sampling_logp_difference/max": 1.2839181423187256, + "sampling/sampling_logp_difference/mean": 0.010844534263014793, + "step": 65, + "step_time": 80.86000475000401 + }, + { + "clip_ratio/high_max": 0.025044884881936014, + "clip_ratio/high_mean": 0.014345359115395695, + "clip_ratio/low_mean": 0.02013494382845238, + "clip_ratio/low_min": 0.0020833334419876337, + "clip_ratio/region_mean": 0.03448030271101743, + "entropy": 0.13131517032161355, + "epoch": 0.00066, + "grad_norm": 0.4750834107398987, + "kl": 0.34219094878062606, + "learning_rate": 9.765323729252955e-05, + "loss": 0.0561, + "step": 66, + "step_time": 28.661124781996477 + }, + { + "clip_ratio/high_max": 0.009476827806793153, + "clip_ratio/high_mean": 0.0062825315981172025, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0062825315981172025, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10269.0, + "completions/max_terminated_length": 10269.0, + "completions/mean_length": 6501.5, + "completions/mean_terminated_length": 6501.5, + "completions/min_length": 724.0, + "completions/min_terminated_length": 724.0, + "entropy": 0.14845013478770852, + "epoch": 0.00067, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.9048762917518616, + "kl": 0.3337597157806158, + "learning_rate": 9.749597242421838e-05, + "loss": 0.0677, + "num_tokens": 9077833.0, + "reward": 0.6164548397064209, + "reward_std": 0.5683386325836182, + "rewards/rollout_eval_reward_func/mean": 0.4744664430618286, + "rewards/rollout_eval_reward_func/std": 0.29613611102104187, + "rewards/rollout_reward_func/mean": 0.6164548397064209, + "rewards/rollout_reward_func/std": 0.6236394643783569, + "sampling/importance_sampling_ratio/max": 1.9927904605865479, + "sampling/importance_sampling_ratio/mean": 1.0013047456741333, + "sampling/importance_sampling_ratio/min": 0.5228504538536072, + "sampling/sampling_logp_difference/max": 0.6895358562469482, + "sampling/sampling_logp_difference/mean": 0.011342051438987255, + "step": 67, + "step_time": 79.70687562199964 + }, + { + "clip_ratio/high_max": 0.0376884457655251, + "clip_ratio/high_mean": 0.026128767582122236, + "clip_ratio/low_mean": 0.026416301843710244, + "clip_ratio/low_min": 0.007291666814126074, + "clip_ratio/region_mean": 0.0525450695422478, + "entropy": 0.15412914380431175, + "epoch": 0.00068, + "grad_norm": 0.8491650223731995, + "kl": 0.3899666126817465, + "learning_rate": 9.733378918839942e-05, + "loss": 0.0638, + "step": 68, + "step_time": 27.40538086699962 + }, + { + "clip_ratio/high_max": 0.006514550419524312, + "clip_ratio/high_mean": 0.003257275209762156, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.003257275209762156, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10202.0, + "completions/max_terminated_length": 10202.0, + "completions/mean_length": 5860.65625, + "completions/mean_terminated_length": 5860.65625, + "completions/min_length": 540.0, + "completions/min_terminated_length": 540.0, + "entropy": 0.16269859950989485, + "epoch": 0.00069, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.9143983721733093, + "kl": 0.47777214366942644, + "learning_rate": 9.716671037846007e-05, + "loss": 0.1152, + "num_tokens": 9293397.0, + "reward": 0.6956244707107544, + "reward_std": 0.5506021976470947, + "rewards/rollout_eval_reward_func/mean": 0.5125762224197388, + "rewards/rollout_eval_reward_func/std": 0.2857610881328583, + "rewards/rollout_reward_func/mean": 0.6956244707107544, + "rewards/rollout_reward_func/std": 0.5588130354881287, + "sampling/importance_sampling_ratio/max": 1.4407436847686768, + "sampling/importance_sampling_ratio/mean": 1.0004699230194092, + "sampling/importance_sampling_ratio/min": 0.5672728419303894, + "sampling/sampling_logp_difference/max": 0.5669147968292236, + "sampling/sampling_logp_difference/mean": 0.010705020278692245, + "step": 69, + "step_time": 77.93740563500614 + }, + { + "clip_ratio/high_max": 0.04634982522111386, + "clip_ratio/high_mean": 0.029493737209122628, + "clip_ratio/low_mean": 0.01730769290588796, + "clip_ratio/low_min": 0.004166666767559946, + "clip_ratio/region_mean": 0.046801429823972285, + "entropy": 0.1784980888478458, + "epoch": 0.0007, + "grad_norm": 0.7063129544258118, + "kl": 0.3514184970408678, + "learning_rate": 9.699475947581644e-05, + "loss": 0.1049, + "step": 70, + "step_time": 27.06573885999751 + }, + { + "clip_ratio/high_max": 0.0018382353009656072, + "clip_ratio/high_mean": 0.0009191176504828036, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0009191176504828036, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10593.0, + "completions/max_terminated_length": 10593.0, + "completions/mean_length": 6225.28125, + "completions/mean_terminated_length": 6225.28125, + "completions/min_length": 1544.0, + "completions/min_terminated_length": 1544.0, + "entropy": 0.17604797054082155, + "epoch": 0.00071, + "frac_reward_zero_std": 0.25, + "grad_norm": 0.7393732070922852, + "kl": 0.20170354284346104, + "learning_rate": 9.681796064661319e-05, + "loss": 0.0413, + "num_tokens": 9520372.0, + "reward": 0.8103519678115845, + "reward_std": 0.3980957269668579, + "rewards/rollout_eval_reward_func/mean": 0.5907012224197388, + "rewards/rollout_eval_reward_func/std": 0.20860876142978668, + "rewards/rollout_reward_func/mean": 0.8103519678115845, + "rewards/rollout_reward_func/std": 0.49828964471817017, + "sampling/importance_sampling_ratio/max": 1.5257266759872437, + "sampling/importance_sampling_ratio/mean": 0.9991195201873779, + "sampling/importance_sampling_ratio/min": 0.6470949649810791, + "sampling/sampling_logp_difference/max": 0.43526220321655273, + "sampling/sampling_logp_difference/mean": 0.010150602087378502, + "step": 71, + "step_time": 79.67722541299918 + }, + { + "clip_ratio/high_max": 0.01454339677002281, + "clip_ratio/high_mean": 0.008729609136935323, + "clip_ratio/low_mean": 0.009114583488553762, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.01784419280011207, + "entropy": 0.18162205442786217, + "epoch": 0.00072, + "grad_norm": 0.44046881794929504, + "kl": 0.20182663016021252, + "learning_rate": 9.663633873832725e-05, + "loss": 0.0328, + "step": 72, + "step_time": 28.538212690000364 + }, + { + "clip_ratio/high_max": 0.004232634324580431, + "clip_ratio/high_mean": 0.0021163171622902155, + "clip_ratio/low_mean": 0.0005208333604969084, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.002637150522787124, + "completions/clipped_ratio": 0.0, + "completions/max_length": 10142.0, + "completions/max_terminated_length": 10142.0, + "completions/mean_length": 6840.59375, + "completions/mean_terminated_length": 6840.59375, + "completions/min_length": 1013.0, + "completions/min_terminated_length": 1013.0, + "entropy": 0.20604060776531696, + "epoch": 0.00073, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.6912176609039307, + "kl": 0.26183000626042485, + "learning_rate": 9.644991927627566e-05, + "loss": -0.0088, + "num_tokens": 9767000.0, + "reward": 0.7756590247154236, + "reward_std": 0.5361872911453247, + "rewards/rollout_eval_reward_func/mean": 0.5909552574157715, + "rewards/rollout_eval_reward_func/std": 0.2465948760509491, + "rewards/rollout_reward_func/mean": 0.7756590247154236, + "rewards/rollout_reward_func/std": 0.5321318507194519, + "sampling/importance_sampling_ratio/max": 1.2645851373672485, + "sampling/importance_sampling_ratio/mean": 1.0009121894836426, + "sampling/importance_sampling_ratio/min": 0.6386132836341858, + "sampling/sampling_logp_difference/max": 0.4484562873840332, + "sampling/sampling_logp_difference/mean": 0.010102368891239166, + "step": 73, + "step_time": 82.14820753000458 + }, + { + "clip_ratio/high_max": 0.02557993505615741, + "clip_ratio/high_mean": 0.01748599053826183, + "clip_ratio/low_mean": 0.009895833674818277, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.027381824154872447, + "entropy": 0.22169003915041685, + "epoch": 0.00074, + "grad_norm": 0.42521047592163086, + "kl": 0.23322301171720028, + "learning_rate": 9.625872846002834e-05, + "loss": -0.0155, + "step": 74, + "step_time": 28.134478513999056 + }, + { + "clip_ratio/high_max": 0.008986742584966123, + "clip_ratio/high_mean": 0.005014204594772309, + "clip_ratio/low_mean": 0.002018229220993817, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.007032433815766126, + "completions/clipped_ratio": 0.0, + "completions/max_length": 9857.0, + "completions/max_terminated_length": 9857.0, + "completions/mean_length": 7195.375, + "completions/mean_terminated_length": 7195.375, + "completions/min_length": 768.0, + "completions/min_terminated_length": 768.0, + "entropy": 0.27833056077361107, + "epoch": 0.00075, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.8185970187187195, + "kl": 0.24367811996489763, + "learning_rate": 9.606279315972582e-05, + "loss": -0.1492, + "num_tokens": 10025024.0, + "reward": 0.2978099584579468, + "reward_std": 0.6597497463226318, + "rewards/rollout_eval_reward_func/mean": 0.32901421189308167, + "rewards/rollout_eval_reward_func/std": 0.3157320022583008, + "rewards/rollout_reward_func/mean": 0.2978099584579468, + "rewards/rollout_reward_func/std": 0.690675675868988, + "sampling/importance_sampling_ratio/max": 1.4156017303466797, + "sampling/importance_sampling_ratio/mean": 1.0000808238983154, + "sampling/importance_sampling_ratio/min": 0.6558278799057007, + "sampling/sampling_logp_difference/max": 0.4218568801879883, + "sampling/sampling_logp_difference/mean": 0.013327672146260738, + "step": 75, + "step_time": 88.96669697499601 + }, + { + "epoch": 0.00075, + "eval_clip_ratio/high_max": 0.0, + "eval_clip_ratio/high_mean": 0.0, + "eval_clip_ratio/low_mean": 0.0, + "eval_clip_ratio/low_min": 0.0, + "eval_clip_ratio/region_mean": 0.0, + "eval_completions/clipped_ratio": 0.0, + "eval_completions/max_length": 9194.0, + "eval_completions/max_terminated_length": 9194.0, + "eval_completions/mean_length": 7026.0375, + "eval_completions/mean_terminated_length": 7026.0375, + "eval_completions/min_length": 4333.95, + "eval_completions/min_terminated_length": 4333.95, + "eval_entropy": 0.3085056647658348, + "eval_frac_reward_zero_std": 1.0, + "eval_kl": 0.22236853390932082, + "eval_loss": 0.0002063037100015208, + "eval_num_tokens": 10025024.0, + "eval_reward": 0.35444250535219907, + "eval_reward_std": 0.0, + "eval_rewards/rollout_eval_reward_func/mean": 0.3484247986227274, + "eval_rewards/rollout_eval_reward_func/std": 0.26531881298869847, + "eval_rewards/rollout_reward_func/mean": 0.35444250535219907, + "eval_rewards/rollout_reward_func/std": 0.5791118375957012, + "eval_runtime": 161.4965, + "eval_samples_per_second": 0.062, + "eval_sampling/importance_sampling_ratio/max": 1.1964155852794647, + "eval_sampling/importance_sampling_ratio/mean": 1.0003154128789902, + "eval_sampling/importance_sampling_ratio/min": 0.7968822807073593, + "eval_sampling/sampling_logp_difference/max": 0.2617991387844086, + "eval_sampling/sampling_logp_difference/mean": 0.01210988024249673, + "eval_steps_per_second": 0.019, + "step": 75 + } + ], + "logging_steps": 1.0, + "max_steps": 300, + "num_input_tokens_seen": 10025024, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..22a07b1 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:258bdc828cf0aea84d4105d1bc4961ef5cad4760f620dfe20a527a870df319c1 +size 8145