commit 92d21beda9f07d852c2e0c5206d895e814baf4ed
Author: ModelHub XC <noreply@modelhub.org.cn>
Date:   Wed May 6 13:37:50 2026 +0800

    初始化项目，由ModelHub XC社区提供模型
    
    Model: bimabk/test_gin_rummy_qwen_2-5_3B
    Source: Original Platform

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..52373fe
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,36 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e645074
--- /dev/null
+++ b/README.md
@@ -0,0 +1,209 @@
+---
+base_model: None
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:/cache/models/unsloth--Llama-3.2-3B-Instruct
+- grpo
+- lora
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.18.1
\ No newline at end of file
diff --git a/adapter_config.json b/adapter_config.json
new file mode 100644
index 0000000..a16376b
--- /dev/null
+++ b/adapter_config.json
@@ -0,0 +1,46 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj",
+    "gate_proj",
+    "q_proj",
+    "up_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/adapter_model.safetensors b/adapter_model.safetensors
new file mode 100644
index 0000000..46b774e
--- /dev/null
+++ b/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25013bdff5adbe26a3cd0e1f5b322cab8ae89bbfa208e14f7ab1d8964aa87b7f
+size 194563400
diff --git a/chat_template.jinja b/chat_template.jinja
new file mode 100644
index 0000000..1bad6a0
--- /dev/null
+++ b/chat_template.jinja
@@ -0,0 +1,93 @@
+{{- bos_token }}
+{%- if custom_tools is defined %}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools_in_user_message is defined %}
+    {%- set tools_in_user_message = true %}
+{%- endif %}
+{%- if not date_string is defined %}
+    {%- if strftime_now is defined %}
+        {%- set date_string = strftime_now("%d %b %Y") %}
+    {%- else %}
+        {%- set date_string = "26 Jul 2024" %}
+    {%- endif %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+
+{#- This block extracts the system message, so we can slot it into the right place. #}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content']|trim %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- set system_message = "" %}
+{%- endif %}
+
+{#- System message #}
+{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
+{%- if tools is not none %}
+    {{- "Environment: ipython\n" }}
+{%- endif %}
+{{- "Cutting Knowledge Date: December 2023\n" }}
+{{- "Today Date: " + date_string + "\n\n" }}
+{%- if tools is not none and not tools_in_user_message %}
+    {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+{%- endif %}
+{{- system_message }}
+{{- "<|eot_id|>" }}
+
+{#- Custom tools are passed in a user message with some extra guidance #}
+{%- if tools_in_user_message and not tools is none %}
+    {#- Extract the first user message so we can plug it in here #}
+    {%- if messages | length != 0 %}
+        {%- set first_user_message = messages[0]['content']|trim %}
+        {%- set messages = messages[1:] %}
+    {%- else %}
+        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
+{%- endif %}
+    {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
+    {{- "Given the following functions, please respond with a JSON for a function call " }}
+    {{- "with its proper arguments that best answers the given prompt.\n\n" }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+    {{- first_user_message + "<|eot_id|>"}}
+{%- endif %}
+
+{%- for message in messages %}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
+    {%- elif 'tool_calls' in message %}
+        {%- if not message.tool_calls|length == 1 %}
+            {{- raise_exception("This model only supports single tool-calls at once!") }}
+        {%- endif %}
+        {%- set tool_call = message.tool_calls[0].function %}
+        {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+        {{- '{"name": "' + tool_call.name + '", ' }}
+        {{- '"parameters": ' }}
+        {{- tool_call.arguments | tojson }}
+        {{- "}" }}
+        {{- "<|eot_id|>" }}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
+        {%- if message.content is mapping or message.content is iterable %}
+            {{- message.content | tojson }}
+        {%- else %}
+            {{- message.content }}
+        {%- endif %}
+        {{- "<|eot_id|>" }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
+{%- endif %}
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..bbf879a
--- /dev/null
+++ b/config.json
@@ -0,0 +1,37 @@
+{
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 128000,
+  "dtype": "bfloat16",
+  "eos_token_id": 128009,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 3072,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 24,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "pad_token_id": 128004,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 32.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": true,
+  "transformers_version": "4.57.5",
+  "unsloth_fixed": true,
+  "use_cache": true,
+  "vocab_size": 128256
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000..1fe1bd6
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,5 @@
+{
+  "temperature": null,
+  "top_p": null,
+  "transformers_version": "4.57.5"
+}
diff --git a/loss.txt b/loss.txt
new file mode 100644
index 0000000..2ccabfc
--- /dev/null
+++ b/loss.txt
@@ -0,0 +1 @@
+75,-0.3484247986227274
\ No newline at end of file
diff --git a/model-00001-of-00002.safetensors b/model-00001-of-00002.safetensors
new file mode 100644
index 0000000..d6896ed
--- /dev/null
+++ b/model-00001-of-00002.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:136a080bca0bb7437e6ef58d47a7f5bbe54217a2fb386207ff9b3eb2d72f6cc0
+size 4965799096
diff --git a/model-00002-of-00002.safetensors b/model-00002-of-00002.safetensors
new file mode 100644
index 0000000..c8ded1e
--- /dev/null
+++ b/model-00002-of-00002.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b770216613ac5c34d7c54bdff1fa616bc4e338a9d0b20af6303e48c295ee23c
+size 1459729952
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000..f84d97f
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1,262 @@
+{
+  "metadata": {
+    "total_parameters": 3212749824,
+    "total_size": 6425499648
+  },
+  "weight_map": {
+    "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.norm.weight": "model-00002-of-00002.safetensors"
+  }
+}
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000..3c1d049
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+  "bos_token": {
+    "content": "<|begin_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|eot_id|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|finetune_right_pad_id|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000..1c1d8d5
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
+size 17209920
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000..eccf822
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,2066 @@
+{
+  "add_bos_token": true,
+  "added_tokens_decoder": {
+    "128000": {
+      "content": "<|begin_of_text|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128001": {
+      "content": "<|end_of_text|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128002": {
+      "content": "<|reserved_special_token_0|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128003": {
+      "content": "<|reserved_special_token_1|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128004": {
+      "content": "<|finetune_right_pad_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128005": {
+      "content": "<|reserved_special_token_2|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128006": {
+      "content": "<|start_header_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128007": {
+      "content": "<|end_header_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128008": {
+      "content": "<|eom_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128009": {
+      "content": "<|eot_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128010": {
+      "content": "<|python_tag|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128011": {
+      "content": "<|reserved_special_token_3|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128012": {
+      "content": "<|reserved_special_token_4|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128013": {
+      "content": "<|reserved_special_token_5|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128014": {
+      "content": "<|reserved_special_token_6|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128015": {
+      "content": "<|reserved_special_token_7|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128016": {
+      "content": "<|reserved_special_token_8|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128017": {
+      "content": "<|reserved_special_token_9|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128018": {
+      "content": "<|reserved_special_token_10|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128019": {
+      "content": "<|reserved_special_token_11|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128020": {
+      "content": "<|reserved_special_token_12|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128021": {
+      "content": "<|reserved_special_token_13|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128022": {
+      "content": "<|reserved_special_token_14|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128023": {
+      "content": "<|reserved_special_token_15|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128024": {
+      "content": "<|reserved_special_token_16|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128025": {
+      "content": "<|reserved_special_token_17|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128026": {
+      "content": "<|reserved_special_token_18|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128027": {
+      "content": "<|reserved_special_token_19|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128028": {
+      "content": "<|reserved_special_token_20|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128029": {
+      "content": "<|reserved_special_token_21|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128030": {
+      "content": "<|reserved_special_token_22|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128031": {
+      "content": "<|reserved_special_token_23|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128032": {
+      "content": "<|reserved_special_token_24|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128033": {
+      "content": "<|reserved_special_token_25|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128034": {
+      "content": "<|reserved_special_token_26|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128035": {
+      "content": "<|reserved_special_token_27|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128036": {
+      "content": "<|reserved_special_token_28|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128037": {
+      "content": "<|reserved_special_token_29|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128038": {
+      "content": "<|reserved_special_token_30|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128039": {
+      "content": "<|reserved_special_token_31|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128040": {
+      "content": "<|reserved_special_token_32|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128041": {
+      "content": "<|reserved_special_token_33|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128042": {
+      "content": "<|reserved_special_token_34|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128043": {
+      "content": "<|reserved_special_token_35|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128044": {
+      "content": "<|reserved_special_token_36|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128045": {
+      "content": "<|reserved_special_token_37|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128046": {
+      "content": "<|reserved_special_token_38|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128047": {
+      "content": "<|reserved_special_token_39|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128048": {
+      "content": "<|reserved_special_token_40|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128049": {
+      "content": "<|reserved_special_token_41|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128050": {
+      "content": "<|reserved_special_token_42|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128051": {
+      "content": "<|reserved_special_token_43|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128052": {
+      "content": "<|reserved_special_token_44|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128053": {
+      "content": "<|reserved_special_token_45|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128054": {
+      "content": "<|reserved_special_token_46|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128055": {
+      "content": "<|reserved_special_token_47|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128056": {
+      "content": "<|reserved_special_token_48|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128057": {
+      "content": "<|reserved_special_token_49|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128058": {
+      "content": "<|reserved_special_token_50|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128059": {
+      "content": "<|reserved_special_token_51|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128060": {
+      "content": "<|reserved_special_token_52|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128061": {
+      "content": "<|reserved_special_token_53|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128062": {
+      "content": "<|reserved_special_token_54|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128063": {
+      "content": "<|reserved_special_token_55|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128064": {
+      "content": "<|reserved_special_token_56|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128065": {
+      "content": "<|reserved_special_token_57|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128066": {
+      "content": "<|reserved_special_token_58|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128067": {
+      "content": "<|reserved_special_token_59|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128068": {
+      "content": "<|reserved_special_token_60|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128069": {
+      "content": "<|reserved_special_token_61|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128070": {
+      "content": "<|reserved_special_token_62|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128071": {
+      "content": "<|reserved_special_token_63|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128072": {
+      "content": "<|reserved_special_token_64|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128073": {
+      "content": "<|reserved_special_token_65|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128074": {
+      "content": "<|reserved_special_token_66|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128075": {
+      "content": "<|reserved_special_token_67|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128076": {
+      "content": "<|reserved_special_token_68|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128077": {
+      "content": "<|reserved_special_token_69|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128078": {
+      "content": "<|reserved_special_token_70|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128079": {
+      "content": "<|reserved_special_token_71|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128080": {
+      "content": "<|reserved_special_token_72|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128081": {
+      "content": "<|reserved_special_token_73|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128082": {
+      "content": "<|reserved_special_token_74|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128083": {
+      "content": "<|reserved_special_token_75|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128084": {
+      "content": "<|reserved_special_token_76|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128085": {
+      "content": "<|reserved_special_token_77|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128086": {
+      "content": "<|reserved_special_token_78|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128087": {
+      "content": "<|reserved_special_token_79|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128088": {
+      "content": "<|reserved_special_token_80|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128089": {
+      "content": "<|reserved_special_token_81|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128090": {
+      "content": "<|reserved_special_token_82|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128091": {
+      "content": "<|reserved_special_token_83|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128092": {
+      "content": "<|reserved_special_token_84|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128093": {
+      "content": "<|reserved_special_token_85|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128094": {
+      "content": "<|reserved_special_token_86|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128095": {
+      "content": "<|reserved_special_token_87|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128096": {
+      "content": "<|reserved_special_token_88|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128097": {
+      "content": "<|reserved_special_token_89|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128098": {
+      "content": "<|reserved_special_token_90|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128099": {
+      "content": "<|reserved_special_token_91|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128100": {
+      "content": "<|reserved_special_token_92|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128101": {
+      "content": "<|reserved_special_token_93|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128102": {
+      "content": "<|reserved_special_token_94|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128103": {
+      "content": "<|reserved_special_token_95|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128104": {
+      "content": "<|reserved_special_token_96|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128105": {
+      "content": "<|reserved_special_token_97|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128106": {
+      "content": "<|reserved_special_token_98|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128107": {
+      "content": "<|reserved_special_token_99|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128108": {
+      "content": "<|reserved_special_token_100|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128109": {
+      "content": "<|reserved_special_token_101|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128110": {
+      "content": "<|reserved_special_token_102|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128111": {
+      "content": "<|reserved_special_token_103|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128112": {
+      "content": "<|reserved_special_token_104|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128113": {
+      "content": "<|reserved_special_token_105|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128114": {
+      "content": "<|reserved_special_token_106|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128115": {
+      "content": "<|reserved_special_token_107|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128116": {
+      "content": "<|reserved_special_token_108|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128117": {
+      "content": "<|reserved_special_token_109|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128118": {
+      "content": "<|reserved_special_token_110|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128119": {
+      "content": "<|reserved_special_token_111|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128120": {
+      "content": "<|reserved_special_token_112|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128121": {
+      "content": "<|reserved_special_token_113|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128122": {
+      "content": "<|reserved_special_token_114|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128123": {
+      "content": "<|reserved_special_token_115|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128124": {
+      "content": "<|reserved_special_token_116|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128125": {
+      "content": "<|reserved_special_token_117|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128126": {
+      "content": "<|reserved_special_token_118|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128127": {
+      "content": "<|reserved_special_token_119|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128128": {
+      "content": "<|reserved_special_token_120|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128129": {
+      "content": "<|reserved_special_token_121|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128130": {
+      "content": "<|reserved_special_token_122|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128131": {
+      "content": "<|reserved_special_token_123|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128132": {
+      "content": "<|reserved_special_token_124|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128133": {
+      "content": "<|reserved_special_token_125|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128134": {
+      "content": "<|reserved_special_token_126|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128135": {
+      "content": "<|reserved_special_token_127|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128136": {
+      "content": "<|reserved_special_token_128|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128137": {
+      "content": "<|reserved_special_token_129|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128138": {
+      "content": "<|reserved_special_token_130|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128139": {
+      "content": "<|reserved_special_token_131|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128140": {
+      "content": "<|reserved_special_token_132|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128141": {
+      "content": "<|reserved_special_token_133|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128142": {
+      "content": "<|reserved_special_token_134|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128143": {
+      "content": "<|reserved_special_token_135|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128144": {
+      "content": "<|reserved_special_token_136|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128145": {
+      "content": "<|reserved_special_token_137|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128146": {
+      "content": "<|reserved_special_token_138|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128147": {
+      "content": "<|reserved_special_token_139|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128148": {
+      "content": "<|reserved_special_token_140|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128149": {
+      "content": "<|reserved_special_token_141|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128150": {
+      "content": "<|reserved_special_token_142|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128151": {
+      "content": "<|reserved_special_token_143|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128152": {
+      "content": "<|reserved_special_token_144|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128153": {
+      "content": "<|reserved_special_token_145|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128154": {
+      "content": "<|reserved_special_token_146|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128155": {
+      "content": "<|reserved_special_token_147|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128156": {
+      "content": "<|reserved_special_token_148|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128157": {
+      "content": "<|reserved_special_token_149|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128158": {
+      "content": "<|reserved_special_token_150|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128159": {
+      "content": "<|reserved_special_token_151|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128160": {
+      "content": "<|reserved_special_token_152|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128161": {
+      "content": "<|reserved_special_token_153|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128162": {
+      "content": "<|reserved_special_token_154|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128163": {
+      "content": "<|reserved_special_token_155|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128164": {
+      "content": "<|reserved_special_token_156|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128165": {
+      "content": "<|reserved_special_token_157|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128166": {
+      "content": "<|reserved_special_token_158|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128167": {
+      "content": "<|reserved_special_token_159|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128168": {
+      "content": "<|reserved_special_token_160|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128169": {
+      "content": "<|reserved_special_token_161|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128170": {
+      "content": "<|reserved_special_token_162|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128171": {
+      "content": "<|reserved_special_token_163|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128172": {
+      "content": "<|reserved_special_token_164|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128173": {
+      "content": "<|reserved_special_token_165|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128174": {
+      "content": "<|reserved_special_token_166|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128175": {
+      "content": "<|reserved_special_token_167|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128176": {
+      "content": "<|reserved_special_token_168|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128177": {
+      "content": "<|reserved_special_token_169|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128178": {
+      "content": "<|reserved_special_token_170|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128179": {
+      "content": "<|reserved_special_token_171|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128180": {
+      "content": "<|reserved_special_token_172|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128181": {
+      "content": "<|reserved_special_token_173|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128182": {
+      "content": "<|reserved_special_token_174|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128183": {
+      "content": "<|reserved_special_token_175|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128184": {
+      "content": "<|reserved_special_token_176|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128185": {
+      "content": "<|reserved_special_token_177|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128186": {
+      "content": "<|reserved_special_token_178|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128187": {
+      "content": "<|reserved_special_token_179|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128188": {
+      "content": "<|reserved_special_token_180|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128189": {
+      "content": "<|reserved_special_token_181|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128190": {
+      "content": "<|reserved_special_token_182|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128191": {
+      "content": "<|reserved_special_token_183|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128192": {
+      "content": "<|reserved_special_token_184|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128193": {
+      "content": "<|reserved_special_token_185|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128194": {
+      "content": "<|reserved_special_token_186|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128195": {
+      "content": "<|reserved_special_token_187|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128196": {
+      "content": "<|reserved_special_token_188|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128197": {
+      "content": "<|reserved_special_token_189|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128198": {
+      "content": "<|reserved_special_token_190|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128199": {
+      "content": "<|reserved_special_token_191|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128200": {
+      "content": "<|reserved_special_token_192|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128201": {
+      "content": "<|reserved_special_token_193|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128202": {
+      "content": "<|reserved_special_token_194|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128203": {
+      "content": "<|reserved_special_token_195|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128204": {
+      "content": "<|reserved_special_token_196|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128205": {
+      "content": "<|reserved_special_token_197|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128206": {
+      "content": "<|reserved_special_token_198|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128207": {
+      "content": "<|reserved_special_token_199|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128208": {
+      "content": "<|reserved_special_token_200|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128209": {
+      "content": "<|reserved_special_token_201|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128210": {
+      "content": "<|reserved_special_token_202|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128211": {
+      "content": "<|reserved_special_token_203|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128212": {
+      "content": "<|reserved_special_token_204|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128213": {
+      "content": "<|reserved_special_token_205|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128214": {
+      "content": "<|reserved_special_token_206|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128215": {
+      "content": "<|reserved_special_token_207|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128216": {
+      "content": "<|reserved_special_token_208|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128217": {
+      "content": "<|reserved_special_token_209|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128218": {
+      "content": "<|reserved_special_token_210|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128219": {
+      "content": "<|reserved_special_token_211|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128220": {
+      "content": "<|reserved_special_token_212|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128221": {
+      "content": "<|reserved_special_token_213|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128222": {
+      "content": "<|reserved_special_token_214|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128223": {
+      "content": "<|reserved_special_token_215|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128224": {
+      "content": "<|reserved_special_token_216|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128225": {
+      "content": "<|reserved_special_token_217|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128226": {
+      "content": "<|reserved_special_token_218|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128227": {
+      "content": "<|reserved_special_token_219|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128228": {
+      "content": "<|reserved_special_token_220|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128229": {
+      "content": "<|reserved_special_token_221|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128230": {
+      "content": "<|reserved_special_token_222|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128231": {
+      "content": "<|reserved_special_token_223|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128232": {
+      "content": "<|reserved_special_token_224|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128233": {
+      "content": "<|reserved_special_token_225|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128234": {
+      "content": "<|reserved_special_token_226|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128235": {
+      "content": "<|reserved_special_token_227|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128236": {
+      "content": "<|reserved_special_token_228|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128237": {
+      "content": "<|reserved_special_token_229|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128238": {
+      "content": "<|reserved_special_token_230|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128239": {
+      "content": "<|reserved_special_token_231|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128240": {
+      "content": "<|reserved_special_token_232|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128241": {
+      "content": "<|reserved_special_token_233|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128242": {
+      "content": "<|reserved_special_token_234|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128243": {
+      "content": "<|reserved_special_token_235|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128244": {
+      "content": "<|reserved_special_token_236|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128245": {
+      "content": "<|reserved_special_token_237|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128246": {
+      "content": "<|reserved_special_token_238|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128247": {
+      "content": "<|reserved_special_token_239|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128248": {
+      "content": "<|reserved_special_token_240|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128249": {
+      "content": "<|reserved_special_token_241|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128250": {
+      "content": "<|reserved_special_token_242|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128251": {
+      "content": "<|reserved_special_token_243|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128252": {
+      "content": "<|reserved_special_token_244|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128253": {
+      "content": "<|reserved_special_token_245|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128254": {
+      "content": "<|reserved_special_token_246|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128255": {
+      "content": "<|reserved_special_token_247|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|begin_of_text|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|eot_id|>",
+  "extra_special_tokens": {},
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 131072,
+  "pad_token": "<|finetune_right_pad_id|>",
+  "padding_side": "left",
+  "tokenizer_class": "PreTrainedTokenizerFast",
+  "unk_token": null
+}
diff --git a/trainer_state.json b/trainer_state.json
new file mode 100644
index 0000000..b2bde1a
--- /dev/null
+++ b/trainer_state.json
@@ -0,0 +1,1954 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.00075,
+  "eval_steps": 500,
+  "global_step": 75,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10096.0,
+      "completions/max_terminated_length": 10096.0,
+      "completions/mean_length": 8672.71875,
+      "completions/mean_terminated_length": 8672.71875,
+      "completions/min_length": 3020.0,
+      "completions/min_terminated_length": 3020.0,
+      "entropy": 0.49113161116838455,
+      "epoch": 1e-05,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.241949200630188,
+      "kl": 0.0,
+      "learning_rate": 0.0,
+      "loss": -0.0633,
+      "num_tokens": 306152.0,
+      "reward": -0.4408680200576782,
+      "reward_std": 0.3989785313606262,
+      "rewards/rollout_eval_reward_func/mean": 0.11064532399177551,
+      "rewards/rollout_eval_reward_func/std": 0.21571724116802216,
+      "rewards/rollout_reward_func/mean": -0.4408680200576782,
+      "rewards/rollout_reward_func/std": 0.44763946533203125,
+      "sampling/importance_sampling_ratio/max": 1.2819759845733643,
+      "sampling/importance_sampling_ratio/mean": 0.9992397427558899,
+      "sampling/importance_sampling_ratio/min": 0.7715137004852295,
+      "sampling/sampling_logp_difference/max": 0.2594008445739746,
+      "sampling/sampling_logp_difference/mean": 0.01546277105808258,
+      "step": 1,
+      "step_time": 73.26994180099973
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "entropy": 0.49113161116838455,
+      "epoch": 2e-05,
+      "grad_norm": 1.2400784492492676,
+      "kl": 0.0,
+      "learning_rate": 2.8571428571428573e-06,
+      "loss": -0.0633,
+      "step": 2,
+      "step_time": 30.109230951999052
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0005208333604969084,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0005208333604969084,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10009.0,
+      "completions/max_terminated_length": 10009.0,
+      "completions/mean_length": 7330.1875,
+      "completions/mean_terminated_length": 7330.1875,
+      "completions/min_length": 346.0,
+      "completions/min_terminated_length": 346.0,
+      "entropy": 0.5131296459585428,
+      "epoch": 3e-05,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.102152943611145,
+      "kl": 0.0009028113518070313,
+      "learning_rate": 5.7142857142857145e-06,
+      "loss": -0.2347,
+      "num_tokens": 569740.0,
+      "reward": -0.48799318075180054,
+      "reward_std": 0.5598607063293457,
+      "rewards/rollout_eval_reward_func/mean": 0.22929370403289795,
+      "rewards/rollout_eval_reward_func/std": 0.26715749502182007,
+      "rewards/rollout_reward_func/mean": -0.48799318075180054,
+      "rewards/rollout_reward_func/std": 0.5559459924697876,
+      "sampling/importance_sampling_ratio/max": 1.2627520561218262,
+      "sampling/importance_sampling_ratio/mean": 1.0006182193756104,
+      "sampling/importance_sampling_ratio/min": 0.7627776861190796,
+      "sampling/sampling_logp_difference/max": 0.27078866958618164,
+      "sampling/sampling_logp_difference/mean": 0.014230873435735703,
+      "step": 3,
+      "step_time": 68.85090976999709
+    },
+    {
+      "clip_ratio/high_max": 0.0020833334419876337,
+      "clip_ratio/high_mean": 0.0010416667209938169,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0010416667209938169,
+      "entropy": 0.5151741988956928,
+      "epoch": 4e-05,
+      "grad_norm": 1.0848904848098755,
+      "kl": 0.0004950130587531021,
+      "learning_rate": 8.571428571428573e-06,
+      "loss": -0.2336,
+      "step": 4,
+      "step_time": 28.428488818004553
+    },
+    {
+      "clip_ratio/high_max": 0.0010416667209938169,
+      "clip_ratio/high_mean": 0.0005208333604969084,
+      "clip_ratio/low_mean": 0.0005208333604969084,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0010416667209938169,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10323.0,
+      "completions/max_terminated_length": 10323.0,
+      "completions/mean_length": 8267.125,
+      "completions/mean_terminated_length": 8267.125,
+      "completions/min_length": 1640.0,
+      "completions/min_terminated_length": 1640.0,
+      "entropy": 0.5123504158109426,
+      "epoch": 5e-05,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.1476984024047852,
+      "kl": 0.0007431179510604125,
+      "learning_rate": 1.1428571428571429e-05,
+      "loss": -0.0418,
+      "num_tokens": 862728.0,
+      "reward": -0.46075016260147095,
+      "reward_std": 0.5065791606903076,
+      "rewards/rollout_eval_reward_func/mean": 0.128683939576149,
+      "rewards/rollout_eval_reward_func/std": 0.2396152913570404,
+      "rewards/rollout_reward_func/mean": -0.46075016260147095,
+      "rewards/rollout_reward_func/std": 0.5104123950004578,
+      "sampling/importance_sampling_ratio/max": 1.3248213529586792,
+      "sampling/importance_sampling_ratio/mean": 1.0001360177993774,
+      "sampling/importance_sampling_ratio/min": 0.6914317011833191,
+      "sampling/sampling_logp_difference/max": 0.3689908981323242,
+      "sampling/sampling_logp_difference/mean": 0.016226449981331825,
+      "step": 5,
+      "step_time": 75.37122915000327
+    },
+    {
+      "clip_ratio/high_max": 0.0026041667442768812,
+      "clip_ratio/high_mean": 0.0013020833721384406,
+      "clip_ratio/low_mean": 0.0032900729565881193,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.00459215632872656,
+      "entropy": 0.5106779877096415,
+      "epoch": 6e-05,
+      "grad_norm": 1.0145094394683838,
+      "kl": 0.0013804795053147245,
+      "learning_rate": 1.4285714285714285e-05,
+      "loss": -0.045,
+      "step": 6,
+      "step_time": 29.551835642994774
+    },
+    {
+      "clip_ratio/high_max": 0.0024003623984754086,
+      "clip_ratio/high_mean": 0.0012001811992377043,
+      "clip_ratio/low_mean": 0.0005208333604969084,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0017210145597346127,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10088.0,
+      "completions/max_terminated_length": 10088.0,
+      "completions/mean_length": 8518.21875,
+      "completions/mean_terminated_length": 8518.21875,
+      "completions/min_length": 4084.0,
+      "completions/min_terminated_length": 4084.0,
+      "entropy": 0.5038529355078936,
+      "epoch": 7e-05,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.5022886991500854,
+      "kl": 0.002840353590727318,
+      "learning_rate": 1.7142857142857145e-05,
+      "loss": -0.0036,
+      "num_tokens": 1164601.0,
+      "reward": -0.41255950927734375,
+      "reward_std": 0.46968239545822144,
+      "rewards/rollout_eval_reward_func/mean": 0.11216971278190613,
+      "rewards/rollout_eval_reward_func/std": 0.2204883098602295,
+      "rewards/rollout_reward_func/mean": -0.41255950927734375,
+      "rewards/rollout_reward_func/std": 0.5122336149215698,
+      "sampling/importance_sampling_ratio/max": 1.4158059358596802,
+      "sampling/importance_sampling_ratio/mean": 1.0018370151519775,
+      "sampling/importance_sampling_ratio/min": 0.7707551121711731,
+      "sampling/sampling_logp_difference/max": 0.3476989269256592,
+      "sampling/sampling_logp_difference/mean": 0.017664402723312378,
+      "step": 7,
+      "step_time": 77.99332059699736
+    },
+    {
+      "clip_ratio/high_max": 0.005842391517944634,
+      "clip_ratio/high_mean": 0.0034420291776768863,
+      "clip_ratio/low_mean": 0.0051097974355798215,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.008551826613256708,
+      "entropy": 0.5001224614679813,
+      "epoch": 8e-05,
+      "grad_norm": 1.3377231359481812,
+      "kl": 0.006958273006603122,
+      "learning_rate": 2e-05,
+      "loss": -0.0079,
+      "step": 8,
+      "step_time": 30.119341139003154
+    },
+    {
+      "clip_ratio/high_max": 0.0020833334419876337,
+      "clip_ratio/high_mean": 0.0010416667209938169,
+      "clip_ratio/low_mean": 0.00046641789958812296,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0015080846205819398,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 9987.0,
+      "completions/max_terminated_length": 9987.0,
+      "completions/mean_length": 8235.9375,
+      "completions/mean_terminated_length": 8235.9375,
+      "completions/min_length": 2028.0,
+      "completions/min_terminated_length": 2028.0,
+      "entropy": 0.5665333420038223,
+      "epoch": 9e-05,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.413293719291687,
+      "kl": 0.012357485480606556,
+      "learning_rate": 2.2857142857142858e-05,
+      "loss": -0.0089,
+      "num_tokens": 1456974.0,
+      "reward": -0.2786320447921753,
+      "reward_std": 0.4699662923812866,
+      "rewards/rollout_eval_reward_func/mean": 0.12322154641151428,
+      "rewards/rollout_eval_reward_func/std": 0.23254993557929993,
+      "rewards/rollout_reward_func/mean": -0.2786320447921753,
+      "rewards/rollout_reward_func/std": 0.510530948638916,
+      "sampling/importance_sampling_ratio/max": 1.6322839260101318,
+      "sampling/importance_sampling_ratio/mean": 0.9981738328933716,
+      "sampling/importance_sampling_ratio/min": 0.6440463662147522,
+      "sampling/sampling_logp_difference/max": 0.48998022079467773,
+      "sampling/sampling_logp_difference/mean": 0.02640429511666298,
+      "step": 9,
+      "step_time": 80.34681812299641
+    },
+    {
+      "clip_ratio/high_max": 0.028179825632832944,
+      "clip_ratio/high_mean": 0.01559113833354786,
+      "clip_ratio/low_mean": 0.01464278216008097,
+      "clip_ratio/low_min": 0.006223290809430182,
+      "clip_ratio/region_mean": 0.03023392061004415,
+      "entropy": 0.5607042815536261,
+      "epoch": 0.0001,
+      "grad_norm": 1.2342119216918945,
+      "kl": 0.03045007959008217,
+      "learning_rate": 2.5714285714285714e-05,
+      "loss": -0.0159,
+      "step": 10,
+      "step_time": 28.650263912999435
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0005208333604969084,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0005208333604969084,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10076.0,
+      "completions/max_terminated_length": 10076.0,
+      "completions/mean_length": 8311.21875,
+      "completions/mean_terminated_length": 8311.21875,
+      "completions/min_length": 1530.0,
+      "completions/min_terminated_length": 1530.0,
+      "entropy": 0.4887528121471405,
+      "epoch": 0.00011,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.4407812356948853,
+      "kl": 0.04280303395353258,
+      "learning_rate": 2.857142857142857e-05,
+      "loss": -0.0508,
+      "num_tokens": 1751757.0,
+      "reward": -0.26280224323272705,
+      "reward_std": 0.4824950098991394,
+      "rewards/rollout_eval_reward_func/mean": 0.1091209352016449,
+      "rewards/rollout_eval_reward_func/std": 0.22141531109809875,
+      "rewards/rollout_reward_func/mean": -0.26280224323272705,
+      "rewards/rollout_reward_func/std": 0.4825066328048706,
+      "sampling/importance_sampling_ratio/max": 2.2060391902923584,
+      "sampling/importance_sampling_ratio/mean": 1.003042221069336,
+      "sampling/importance_sampling_ratio/min": 0.505047619342804,
+      "sampling/sampling_logp_difference/max": 0.79119873046875,
+      "sampling/sampling_logp_difference/mean": 0.03998423367738724,
+      "step": 11,
+      "step_time": 81.20211481799561
+    },
+    {
+      "clip_ratio/high_max": 0.031166458851657808,
+      "clip_ratio/high_mean": 0.01714572956552729,
+      "clip_ratio/low_mean": 0.018567851395346224,
+      "clip_ratio/low_min": 0.005885701393708587,
+      "clip_ratio/region_mean": 0.0357135811354965,
+      "entropy": 0.47410433553159237,
+      "epoch": 0.00012,
+      "grad_norm": 1.048365831375122,
+      "kl": 0.08051084214821458,
+      "learning_rate": 3.142857142857143e-05,
+      "loss": -0.0558,
+      "step": 12,
+      "step_time": 29.28374841400546
+    },
+    {
+      "clip_ratio/high_max": 0.001953125,
+      "clip_ratio/high_mean": 0.0009765625,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0009765625,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10209.0,
+      "completions/max_terminated_length": 10209.0,
+      "completions/mean_length": 8161.71875,
+      "completions/mean_terminated_length": 8161.71875,
+      "completions/min_length": 1827.0,
+      "completions/min_terminated_length": 1827.0,
+      "entropy": 0.43679925985634327,
+      "epoch": 0.00013,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.0560696125030518,
+      "kl": 0.09263498219661415,
+      "learning_rate": 3.428571428571429e-05,
+      "loss": 0.1042,
+      "num_tokens": 2042327.0,
+      "reward": -0.02590048871934414,
+      "reward_std": 0.6161512732505798,
+      "rewards/rollout_eval_reward_func/mean": 0.16006097197532654,
+      "rewards/rollout_eval_reward_func/std": 0.2864827811717987,
+      "rewards/rollout_reward_func/mean": -0.02590048871934414,
+      "rewards/rollout_reward_func/std": 0.6041470170021057,
+      "sampling/importance_sampling_ratio/max": 2.7582640647888184,
+      "sampling/importance_sampling_ratio/mean": 0.9981331825256348,
+      "sampling/importance_sampling_ratio/min": 0.361401230096817,
+      "sampling/sampling_logp_difference/max": 1.0177664756774902,
+      "sampling/sampling_logp_difference/mean": 0.06089622899889946,
+      "step": 13,
+      "step_time": 85.01218143400365
+    },
+    {
+      "clip_ratio/high_max": 0.012486383900977671,
+      "clip_ratio/high_mean": 0.007805692031979561,
+      "clip_ratio/low_mean": 0.030729168094694614,
+      "clip_ratio/low_min": 0.015625000465661287,
+      "clip_ratio/region_mean": 0.038534860184881836,
+      "entropy": 0.41658624820411205,
+      "epoch": 0.00014,
+      "grad_norm": 1.044942855834961,
+      "kl": 0.16313170175999403,
+      "learning_rate": 3.7142857142857143e-05,
+      "loss": 0.1002,
+      "step": 14,
+      "step_time": 28.990433916003894
+    },
+    {
+      "clip_ratio/high_max": 0.00596590933855623,
+      "clip_ratio/high_mean": 0.0035037880297750235,
+      "clip_ratio/low_mean": 0.0005122950533404946,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.004016083083115518,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10134.0,
+      "completions/max_terminated_length": 10134.0,
+      "completions/mean_length": 8323.34375,
+      "completions/mean_terminated_length": 8323.34375,
+      "completions/min_length": 1934.0,
+      "completions/min_terminated_length": 1934.0,
+      "entropy": 0.44160761684179306,
+      "epoch": 0.00015,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.2544862031936646,
+      "kl": 0.21658248733729124,
+      "learning_rate": 4e-05,
+      "loss": 0.1199,
+      "num_tokens": 2337711.0,
+      "reward": -0.0776321142911911,
+      "reward_std": 0.5812347531318665,
+      "rewards/rollout_eval_reward_func/mean": 0.14151422679424286,
+      "rewards/rollout_eval_reward_func/std": 0.2538794279098511,
+      "rewards/rollout_reward_func/mean": -0.0776321142911911,
+      "rewards/rollout_reward_func/std": 0.5845968723297119,
+      "sampling/importance_sampling_ratio/max": 1.8725090026855469,
+      "sampling/importance_sampling_ratio/mean": 0.9912927150726318,
+      "sampling/importance_sampling_ratio/min": 0.1565917581319809,
+      "sampling/sampling_logp_difference/max": 1.8541131019592285,
+      "sampling/sampling_logp_difference/mean": 0.06762713938951492,
+      "step": 15,
+      "step_time": 87.63701662399762
+    },
+    {
+      "clip_ratio/high_max": 0.033285985700786114,
+      "clip_ratio/high_mean": 0.02006899402476847,
+      "clip_ratio/low_mean": 0.017902423918712884,
+      "clip_ratio/low_min": 0.008303140406496823,
+      "clip_ratio/region_mean": 0.03797141805989668,
+      "entropy": 0.43832515366375446,
+      "epoch": 0.00016,
+      "grad_norm": 1.1862040758132935,
+      "kl": 0.2433762801811099,
+      "learning_rate": 4.2857142857142856e-05,
+      "loss": 0.1137,
+      "step": 16,
+      "step_time": 30.26940473900322
+    },
+    {
+      "clip_ratio/high_max": 0.005208333604969084,
+      "clip_ratio/high_mean": 0.002604166802484542,
+      "clip_ratio/low_mean": 0.0005208333604969084,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0031250001629814506,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10099.0,
+      "completions/max_terminated_length": 10099.0,
+      "completions/mean_length": 8931.625,
+      "completions/mean_terminated_length": 8931.625,
+      "completions/min_length": 2013.0,
+      "completions/min_terminated_length": 2013.0,
+      "entropy": 0.4058182891458273,
+      "epoch": 0.00017,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.206827998161316,
+      "kl": 0.1886005294509232,
+      "learning_rate": 4.5714285714285716e-05,
+      "loss": -0.0944,
+      "num_tokens": 2652765.0,
+      "reward": -0.0752231553196907,
+      "reward_std": 0.48041343688964844,
+      "rewards/rollout_eval_reward_func/mean": 0.10861280560493469,
+      "rewards/rollout_eval_reward_func/std": 0.2368263602256775,
+      "rewards/rollout_reward_func/mean": -0.0752231553196907,
+      "rewards/rollout_reward_func/std": 0.5091694593429565,
+      "sampling/importance_sampling_ratio/max": 2.2689177989959717,
+      "sampling/importance_sampling_ratio/mean": 1.0046234130859375,
+      "sampling/importance_sampling_ratio/min": 0.1846628040075302,
+      "sampling/sampling_logp_difference/max": 1.6892237663269043,
+      "sampling/sampling_logp_difference/mean": 0.06120520830154419,
+      "step": 17,
+      "step_time": 96.5394253049999
+    },
+    {
+      "clip_ratio/high_max": 0.0221070961561054,
+      "clip_ratio/high_mean": 0.013136881520040333,
+      "clip_ratio/low_mean": 0.005389189289417118,
+      "clip_ratio/low_min": 0.002066256827674806,
+      "clip_ratio/region_mean": 0.01852607080945745,
+      "entropy": 0.40752917528152466,
+      "epoch": 0.00018,
+      "grad_norm": 1.039859652519226,
+      "kl": 0.20007089478895068,
+      "learning_rate": 4.8571428571428576e-05,
+      "loss": -0.1064,
+      "step": 18,
+      "step_time": 29.607819763001316
+    },
+    {
+      "clip_ratio/high_max": 0.00424107164144516,
+      "clip_ratio/high_mean": 0.00212053582072258,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.00212053582072258,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 9868.0,
+      "completions/max_terminated_length": 9868.0,
+      "completions/mean_length": 7739.625,
+      "completions/mean_terminated_length": 7739.625,
+      "completions/min_length": 1494.0,
+      "completions/min_terminated_length": 1494.0,
+      "entropy": 0.3824189379811287,
+      "epoch": 0.00019,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.1822205781936646,
+      "kl": 0.1706448094919324,
+      "learning_rate": 5.142857142857143e-05,
+      "loss": -0.1187,
+      "num_tokens": 2929452.0,
+      "reward": 0.1796756088733673,
+      "reward_std": 0.6716787815093994,
+      "rewards/rollout_eval_reward_func/mean": 0.25978150963783264,
+      "rewards/rollout_eval_reward_func/std": 0.31619328260421753,
+      "rewards/rollout_reward_func/mean": 0.1796756088733673,
+      "rewards/rollout_reward_func/std": 0.6625394821166992,
+      "sampling/importance_sampling_ratio/max": 1.8655627965927124,
+      "sampling/importance_sampling_ratio/mean": 1.0000479221343994,
+      "sampling/importance_sampling_ratio/min": 0.33482789993286133,
+      "sampling/sampling_logp_difference/max": 1.0941386222839355,
+      "sampling/sampling_logp_difference/mean": 0.04819408059120178,
+      "step": 19,
+      "step_time": 92.65558583299753
+    },
+    {
+      "clip_ratio/high_max": 0.030015080701559782,
+      "clip_ratio/high_mean": 0.018132540630176663,
+      "clip_ratio/low_mean": 0.03180725604761392,
+      "clip_ratio/low_min": 0.0052083334885537624,
+      "clip_ratio/region_mean": 0.049939796910621226,
+      "entropy": 0.3580914381891489,
+      "epoch": 0.0002,
+      "grad_norm": 1.152976155281067,
+      "kl": 0.2634436935186386,
+      "learning_rate": 5.428571428571428e-05,
+      "loss": -0.1272,
+      "step": 20,
+      "step_time": 28.27301450500272
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0020833335001952946,
+      "clip_ratio/low_min": 0.0010416667209938169,
+      "clip_ratio/region_mean": 0.0020833335001952946,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10426.0,
+      "completions/max_terminated_length": 10426.0,
+      "completions/mean_length": 7911.40625,
+      "completions/mean_terminated_length": 7911.40625,
+      "completions/min_length": 1040.0,
+      "completions/min_terminated_length": 1040.0,
+      "entropy": 0.3455618601292372,
+      "epoch": 0.00021,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 0.9142677187919617,
+      "kl": 0.2354841867927462,
+      "learning_rate": 5.714285714285714e-05,
+      "loss": -0.0904,
+      "num_tokens": 3211621.0,
+      "reward": 0.09562171995639801,
+      "reward_std": 0.6017146706581116,
+      "rewards/rollout_eval_reward_func/mean": 0.1835619956254959,
+      "rewards/rollout_eval_reward_func/std": 0.2800058424472809,
+      "rewards/rollout_reward_func/mean": 0.09562171995639801,
+      "rewards/rollout_reward_func/std": 0.5979344248771667,
+      "sampling/importance_sampling_ratio/max": 1.7227435111999512,
+      "sampling/importance_sampling_ratio/mean": 0.9981924295425415,
+      "sampling/importance_sampling_ratio/min": 0.38243889808654785,
+      "sampling/sampling_logp_difference/max": 0.961186408996582,
+      "sampling/sampling_logp_difference/mean": 0.04361895099282265,
+      "step": 21,
+      "step_time": 94.40408171299714
+    },
+    {
+      "clip_ratio/high_max": 0.03222161578014493,
+      "clip_ratio/high_mean": 0.0181941413320601,
+      "clip_ratio/low_mean": 0.02708333428017795,
+      "clip_ratio/low_min": 0.0062500000931322575,
+      "clip_ratio/region_mean": 0.04527747584506869,
+      "entropy": 0.3229655371978879,
+      "epoch": 0.00022,
+      "grad_norm": 0.8647798895835876,
+      "kl": 0.21354854525998235,
+      "learning_rate": 6e-05,
+      "loss": -0.1008,
+      "step": 22,
+      "step_time": 30.11174104199381
+    },
+    {
+      "clip_ratio/high_max": 0.0011160714784637094,
+      "clip_ratio/high_mean": 0.0005580357392318547,
+      "clip_ratio/low_mean": 0.0010995370685122907,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0016575728077441454,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10470.0,
+      "completions/max_terminated_length": 10470.0,
+      "completions/mean_length": 7568.375,
+      "completions/mean_terminated_length": 7568.375,
+      "completions/min_length": 2202.0,
+      "completions/min_terminated_length": 2202.0,
+      "entropy": 0.28525836300104856,
+      "epoch": 0.00023,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.0814907550811768,
+      "kl": 0.35280791157856584,
+      "learning_rate": 6.285714285714286e-05,
+      "loss": 0.016,
+      "num_tokens": 3482436.0,
+      "reward": 0.2800288200378418,
+      "reward_std": 0.7106037139892578,
+      "rewards/rollout_eval_reward_func/mean": 0.33079269528388977,
+      "rewards/rollout_eval_reward_func/std": 0.3271085023880005,
+      "rewards/rollout_reward_func/mean": 0.2800288200378418,
+      "rewards/rollout_reward_func/std": 0.6996307373046875,
+      "sampling/importance_sampling_ratio/max": 1.6482936143875122,
+      "sampling/importance_sampling_ratio/mean": 1.0002542734146118,
+      "sampling/importance_sampling_ratio/min": 0.2758394777774811,
+      "sampling/sampling_logp_difference/max": 1.2879362106323242,
+      "sampling/sampling_logp_difference/mean": 0.0332026481628418,
+      "step": 23,
+      "step_time": 93.99063302500326
+    },
+    {
+      "clip_ratio/high_max": 0.01396139187272638,
+      "clip_ratio/high_mean": 0.007690923230256885,
+      "clip_ratio/low_mean": 0.01880787085974589,
+      "clip_ratio/low_min": 0.0031250001629814506,
+      "clip_ratio/region_mean": 0.02649879432283342,
+      "entropy": 0.2676102966070175,
+      "epoch": 0.00024,
+      "grad_norm": 0.8727543354034424,
+      "kl": 0.3772396189160645,
+      "learning_rate": 6.571428571428571e-05,
+      "loss": 0.0057,
+      "step": 24,
+      "step_time": 29.4178187339985
+    },
+    {
+      "clip_ratio/high_max": 0.004613095428794622,
+      "clip_ratio/high_mean": 0.002985895553138107,
+      "clip_ratio/low_mean": 0.0005208333604969084,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0035067289136350155,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 9569.0,
+      "completions/max_terminated_length": 9569.0,
+      "completions/mean_length": 7533.28125,
+      "completions/mean_terminated_length": 7533.28125,
+      "completions/min_length": 2449.0,
+      "completions/min_terminated_length": 2449.0,
+      "entropy": 0.2505391649901867,
+      "epoch": 0.00025,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.017386555671692,
+      "kl": 0.21523633878678083,
+      "learning_rate": 6.857142857142858e-05,
+      "loss": 0.0242,
+      "num_tokens": 3751699.0,
+      "reward": 0.3911706805229187,
+      "reward_std": 0.638326108455658,
+      "rewards/rollout_eval_reward_func/mean": 0.36318597197532654,
+      "rewards/rollout_eval_reward_func/std": 0.3184514343738556,
+      "rewards/rollout_reward_func/mean": 0.3911706805229187,
+      "rewards/rollout_reward_func/std": 0.6562069654464722,
+      "sampling/importance_sampling_ratio/max": 1.5404945611953735,
+      "sampling/importance_sampling_ratio/mean": 0.9984301328659058,
+      "sampling/importance_sampling_ratio/min": 0.4790920615196228,
+      "sampling/sampling_logp_difference/max": 0.7358624935150146,
+      "sampling/sampling_logp_difference/mean": 0.025531694293022156,
+      "step": 25,
+      "step_time": 92.37763964700025
+    },
+    {
+      "clip_ratio/high_max": 0.02074831852223724,
+      "clip_ratio/high_mean": 0.014075796061661094,
+      "clip_ratio/low_mean": 0.024038826406467706,
+      "clip_ratio/low_min": 0.004687500186264515,
+      "clip_ratio/region_mean": 0.0381146224681288,
+      "entropy": 0.24146342556923628,
+      "epoch": 0.00026,
+      "grad_norm": 1.08539617061615,
+      "kl": 0.242179695982486,
+      "learning_rate": 7.142857142857143e-05,
+      "loss": 0.0152,
+      "step": 26,
+      "step_time": 27.09601488199405
+    },
+    {
+      "clip_ratio/high_max": 0.004924242617562413,
+      "clip_ratio/high_mean": 0.0024621213087812066,
+      "clip_ratio/low_mean": 0.0015625000814907253,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.004024621390271932,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 9714.0,
+      "completions/max_terminated_length": 9714.0,
+      "completions/mean_length": 7341.125,
+      "completions/mean_terminated_length": 7341.125,
+      "completions/min_length": 834.0,
+      "completions/min_terminated_length": 834.0,
+      "entropy": 0.24662253353744745,
+      "epoch": 0.00027,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.0926475524902344,
+      "kl": 0.201888975687325,
+      "learning_rate": 7.428571428571429e-05,
+      "loss": -0.0651,
+      "num_tokens": 4014835.0,
+      "reward": 0.26619410514831543,
+      "reward_std": 0.6366387009620667,
+      "rewards/rollout_eval_reward_func/mean": 0.31529471278190613,
+      "rewards/rollout_eval_reward_func/std": 0.3177616000175476,
+      "rewards/rollout_reward_func/mean": 0.26619410514831543,
+      "rewards/rollout_reward_func/std": 0.6645346879959106,
+      "sampling/importance_sampling_ratio/max": 1.7210402488708496,
+      "sampling/importance_sampling_ratio/mean": 0.9990845918655396,
+      "sampling/importance_sampling_ratio/min": 0.46208029985427856,
+      "sampling/sampling_logp_difference/max": 0.7720166444778442,
+      "sampling/sampling_logp_difference/mean": 0.024712545797228813,
+      "step": 27,
+      "step_time": 90.07543276499928
+    },
+    {
+      "clip_ratio/high_max": 0.033208509092219174,
+      "clip_ratio/high_mean": 0.018557379313278943,
+      "clip_ratio/low_mean": 0.035281969350762665,
+      "clip_ratio/low_min": 0.011458333698101342,
+      "clip_ratio/region_mean": 0.05383934878045693,
+      "entropy": 0.24193121027201414,
+      "epoch": 0.00028,
+      "grad_norm": 0.9876235127449036,
+      "kl": 0.26401366433128715,
+      "learning_rate": 7.714285714285715e-05,
+      "loss": -0.073,
+      "step": 28,
+      "step_time": 27.219164144002207
+    },
+    {
+      "clip_ratio/high_max": 0.0010775862028822303,
+      "clip_ratio/high_mean": 0.0005387931014411151,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0005387931014411151,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 9855.0,
+      "completions/max_terminated_length": 9855.0,
+      "completions/mean_length": 7361.875,
+      "completions/mean_terminated_length": 7361.875,
+      "completions/min_length": 842.0,
+      "completions/min_terminated_length": 842.0,
+      "entropy": 0.21860306337475777,
+      "epoch": 0.00029,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 0.9292377233505249,
+      "kl": 0.20060417288914323,
+      "learning_rate": 8e-05,
+      "loss": -0.0977,
+      "num_tokens": 4278506.0,
+      "reward": 0.30670806765556335,
+      "reward_std": 0.652392566204071,
+      "rewards/rollout_eval_reward_func/mean": 0.3278709352016449,
+      "rewards/rollout_eval_reward_func/std": 0.31351709365844727,
+      "rewards/rollout_reward_func/mean": 0.30670806765556335,
+      "rewards/rollout_reward_func/std": 0.6815608143806458,
+      "sampling/importance_sampling_ratio/max": 1.4481010437011719,
+      "sampling/importance_sampling_ratio/mean": 1.0026426315307617,
+      "sampling/importance_sampling_ratio/min": 0.5693169832229614,
+      "sampling/sampling_logp_difference/max": 0.5633178949356079,
+      "sampling/sampling_logp_difference/mean": 0.01894025132060051,
+      "step": 29,
+      "step_time": 88.37378997200358
+    },
+    {
+      "clip_ratio/high_max": 0.02580322092399001,
+      "clip_ratio/high_mean": 0.015042814193293452,
+      "clip_ratio/low_mean": 0.015608090267051011,
+      "clip_ratio/low_min": 0.0020833334419876337,
+      "clip_ratio/region_mean": 0.030650904460344464,
+      "entropy": 0.2232473948970437,
+      "epoch": 0.0003,
+      "grad_norm": 0.6086679697036743,
+      "kl": 0.19415233470499516,
+      "learning_rate": 8.285714285714287e-05,
+      "loss": -0.1081,
+      "step": 30,
+      "step_time": 28.619991764000588
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0005208333604969084,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0005208333604969084,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10726.0,
+      "completions/max_terminated_length": 10726.0,
+      "completions/mean_length": 7164.65625,
+      "completions/mean_terminated_length": 7164.65625,
+      "completions/min_length": 470.0,
+      "completions/min_terminated_length": 470.0,
+      "entropy": 0.23761425912380219,
+      "epoch": 0.00031,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.050552487373352,
+      "kl": 0.25638002483174205,
+      "learning_rate": 8.571428571428571e-05,
+      "loss": 0.012,
+      "num_tokens": 4536097.0,
+      "reward": 0.3345087766647339,
+      "reward_std": 0.5485296249389648,
+      "rewards/rollout_eval_reward_func/mean": 0.3090701103210449,
+      "rewards/rollout_eval_reward_func/std": 0.32714226841926575,
+      "rewards/rollout_reward_func/mean": 0.3345087766647339,
+      "rewards/rollout_reward_func/std": 0.6012357473373413,
+      "sampling/importance_sampling_ratio/max": 1.438549518585205,
+      "sampling/importance_sampling_ratio/mean": 1.0011037588119507,
+      "sampling/importance_sampling_ratio/min": 0.6349728107452393,
+      "sampling/sampling_logp_difference/max": 0.45417308807373047,
+      "sampling/sampling_logp_difference/mean": 0.015337169170379639,
+      "step": 31,
+      "step_time": 92.49027231299806
+    },
+    {
+      "clip_ratio/high_max": 0.03391559107694775,
+      "clip_ratio/high_mean": 0.018867517996113747,
+      "clip_ratio/low_mean": 0.044338769221212715,
+      "clip_ratio/low_min": 0.008333333535119891,
+      "clip_ratio/region_mean": 0.06320628756657243,
+      "entropy": 0.22916866652667522,
+      "epoch": 0.00032,
+      "grad_norm": 1.028586745262146,
+      "kl": 0.3105860697105527,
+      "learning_rate": 8.857142857142857e-05,
+      "loss": 0.0055,
+      "step": 32,
+      "step_time": 29.399824877003994
+    },
+    {
+      "clip_ratio/high_max": 0.0024519230937585235,
+      "clip_ratio/high_mean": 0.0012259615468792617,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0012259615468792617,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10128.0,
+      "completions/max_terminated_length": 10128.0,
+      "completions/mean_length": 7357.46875,
+      "completions/mean_terminated_length": 7357.46875,
+      "completions/min_length": 1917.0,
+      "completions/min_terminated_length": 1917.0,
+      "entropy": 0.2557551637291908,
+      "epoch": 0.00033,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 0.9717881083488464,
+      "kl": 0.2046954189427197,
+      "learning_rate": 9.142857142857143e-05,
+      "loss": 0.0245,
+      "num_tokens": 4799621.0,
+      "reward": 0.35216301679611206,
+      "reward_std": 0.6164546608924866,
+      "rewards/rollout_eval_reward_func/mean": 0.3365091383457184,
+      "rewards/rollout_eval_reward_func/std": 0.3354848027229309,
+      "rewards/rollout_reward_func/mean": 0.35216301679611206,
+      "rewards/rollout_reward_func/std": 0.6309141516685486,
+      "sampling/importance_sampling_ratio/max": 1.333243727684021,
+      "sampling/importance_sampling_ratio/mean": 1.0005223751068115,
+      "sampling/importance_sampling_ratio/min": 0.7339702248573303,
+      "sampling/sampling_logp_difference/max": 0.30928683280944824,
+      "sampling/sampling_logp_difference/mean": 0.014704002998769283,
+      "step": 33,
+      "step_time": 89.53553034700417
+    },
+    {
+      "clip_ratio/high_max": 0.01991061063017696,
+      "clip_ratio/high_mean": 0.011966300604399294,
+      "clip_ratio/low_mean": 0.02272569522028789,
+      "clip_ratio/low_min": 0.009722222457639873,
+      "clip_ratio/region_mean": 0.03469199570827186,
+      "entropy": 0.2428069869056344,
+      "epoch": 0.00034,
+      "grad_norm": 0.685612142086029,
+      "kl": 0.2513351505622268,
+      "learning_rate": 9.428571428571429e-05,
+      "loss": 0.0129,
+      "step": 34,
+      "step_time": 28.25809028400181
+    },
+    {
+      "clip_ratio/high_max": 0.0021990741370245814,
+      "clip_ratio/high_mean": 0.0010995370685122907,
+      "clip_ratio/low_mean": 0.0010416667209938169,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0021412037895061076,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 9665.0,
+      "completions/max_terminated_length": 9665.0,
+      "completions/mean_length": 8000.09375,
+      "completions/mean_terminated_length": 8000.09375,
+      "completions/min_length": 4295.0,
+      "completions/min_terminated_length": 4295.0,
+      "entropy": 0.2354184165596962,
+      "epoch": 0.00035,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.040405035018921,
+      "kl": 0.1770010399632156,
+      "learning_rate": 9.714285714285715e-05,
+      "loss": 0.1517,
+      "num_tokens": 5084103.0,
+      "reward": 0.3385156989097595,
+      "reward_std": 0.5189785957336426,
+      "rewards/rollout_eval_reward_func/mean": 0.23996442556381226,
+      "rewards/rollout_eval_reward_func/std": 0.31991085410118103,
+      "rewards/rollout_reward_func/mean": 0.3385156989097595,
+      "rewards/rollout_reward_func/std": 0.5693588852882385,
+      "sampling/importance_sampling_ratio/max": 1.4071576595306396,
+      "sampling/importance_sampling_ratio/mean": 0.9996304512023926,
+      "sampling/importance_sampling_ratio/min": 0.5387703776359558,
+      "sampling/sampling_logp_difference/max": 0.6184659004211426,
+      "sampling/sampling_logp_difference/mean": 0.015029089525341988,
+      "step": 35,
+      "step_time": 95.05921310200392
+    },
+    {
+      "clip_ratio/high_max": 0.026263557723723352,
+      "clip_ratio/high_mean": 0.014173445466440171,
+      "clip_ratio/low_mean": 0.02787990286014974,
+      "clip_ratio/low_min": 0.007291667046956718,
+      "clip_ratio/region_mean": 0.04205334832658991,
+      "entropy": 0.21858789399266243,
+      "epoch": 0.00036,
+      "grad_norm": 1.0455042123794556,
+      "kl": 0.2051441869698465,
+      "learning_rate": 0.0001,
+      "loss": 0.1403,
+      "step": 36,
+      "step_time": 27.85193802100366
+    },
+    {
+      "clip_ratio/high_max": 0.007164939888752997,
+      "clip_ratio/high_mean": 0.0035824699443764985,
+      "clip_ratio/low_mean": 0.0015625000232830644,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.005144969967659563,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 9514.0,
+      "completions/max_terminated_length": 9514.0,
+      "completions/mean_length": 6029.25,
+      "completions/mean_terminated_length": 6029.25,
+      "completions/min_length": 1061.0,
+      "completions/min_terminated_length": 1061.0,
+      "entropy": 0.21716525312513113,
+      "epoch": 0.00037,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 0.8604521751403809,
+      "kl": 0.23097522975876927,
+      "learning_rate": 9.999736485702831e-05,
+      "loss": -0.0709,
+      "num_tokens": 5305345.0,
+      "reward": 0.41453179717063904,
+      "reward_std": 0.7797224521636963,
+      "rewards/rollout_eval_reward_func/mean": 0.4568089246749878,
+      "rewards/rollout_eval_reward_func/std": 0.28734299540519714,
+      "rewards/rollout_reward_func/mean": 0.41453179717063904,
+      "rewards/rollout_reward_func/std": 0.755694568157196,
+      "sampling/importance_sampling_ratio/max": 1.4738141298294067,
+      "sampling/importance_sampling_ratio/mean": 1.000828742980957,
+      "sampling/importance_sampling_ratio/min": 0.7324953079223633,
+      "sampling/sampling_logp_difference/max": 0.3878536820411682,
+      "sampling/sampling_logp_difference/mean": 0.013184964656829834,
+      "step": 37,
+      "step_time": 76.87407001600332
+    },
+    {
+      "clip_ratio/high_max": 0.04774210066534579,
+      "clip_ratio/high_mean": 0.02752261853311211,
+      "clip_ratio/low_mean": 0.03158482233993709,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.059107440523803234,
+      "entropy": 0.21499714627861977,
+      "epoch": 0.00038,
+      "grad_norm": 1.026845932006836,
+      "kl": 0.3676267918199301,
+      "learning_rate": 9.998945979845876e-05,
+      "loss": -0.0694,
+      "step": 38,
+      "step_time": 27.58343887600313
+    },
+    {
+      "clip_ratio/high_max": 0.006285919691435993,
+      "clip_ratio/high_mean": 0.0031429598457179964,
+      "clip_ratio/low_mean": 0.0010416667209938169,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.004184626566711813,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 9353.0,
+      "completions/max_terminated_length": 9353.0,
+      "completions/mean_length": 6221.78125,
+      "completions/mean_terminated_length": 6221.78125,
+      "completions/min_length": 1175.0,
+      "completions/min_terminated_length": 1175.0,
+      "entropy": 0.21314978785812855,
+      "epoch": 0.00039,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.1063776016235352,
+      "kl": 0.28443425707519054,
+      "learning_rate": 9.997628593527586e-05,
+      "loss": 0.1657,
+      "num_tokens": 5533203.0,
+      "reward": 0.5931290984153748,
+      "reward_std": 0.5068180561065674,
+      "rewards/rollout_eval_reward_func/mean": 0.4369918704032898,
+      "rewards/rollout_eval_reward_func/std": 0.2919425666332245,
+      "rewards/rollout_reward_func/mean": 0.5931290984153748,
+      "rewards/rollout_reward_func/std": 0.6152276396751404,
+      "sampling/importance_sampling_ratio/max": 1.4768017530441284,
+      "sampling/importance_sampling_ratio/mean": 0.9989122152328491,
+      "sampling/importance_sampling_ratio/min": 0.7442160248756409,
+      "sampling/sampling_logp_difference/max": 0.3898787498474121,
+      "sampling/sampling_logp_difference/mean": 0.011076296679675579,
+      "step": 39,
+      "step_time": 80.26773473300273
+    },
+    {
+      "clip_ratio/high_max": 0.03581550612580031,
+      "clip_ratio/high_mean": 0.021467003040015697,
+      "clip_ratio/low_mean": 0.019476010755170137,
+      "clip_ratio/low_min": 0.0031250000465661287,
+      "clip_ratio/region_mean": 0.04094301396980882,
+      "entropy": 0.2001811731606722,
+      "epoch": 0.0004,
+      "grad_norm": 0.8571550250053406,
+      "kl": 0.39517259504646063,
+      "learning_rate": 9.995784511894694e-05,
+      "loss": 0.1561,
+      "step": 40,
+      "step_time": 26.113719172002675
+    },
+    {
+      "clip_ratio/high_max": 0.0027173913549631834,
+      "clip_ratio/high_mean": 0.0013586956774815917,
+      "clip_ratio/low_mean": 0.003238224715460092,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0045969203929416835,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 9876.0,
+      "completions/max_terminated_length": 9876.0,
+      "completions/mean_length": 7216.5625,
+      "completions/mean_terminated_length": 7216.5625,
+      "completions/min_length": 1879.0,
+      "completions/min_terminated_length": 1879.0,
+      "entropy": 0.2681358586996794,
+      "epoch": 0.00041,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.3309671878814697,
+      "kl": 0.2410402470268309,
+      "learning_rate": 9.993413994116206e-05,
+      "loss": 0.1903,
+      "num_tokens": 5792478.0,
+      "reward": 0.471214234828949,
+      "reward_std": 0.5625734329223633,
+      "rewards/rollout_eval_reward_func/mean": 0.3643292784690857,
+      "rewards/rollout_eval_reward_func/std": 0.34053289890289307,
+      "rewards/rollout_reward_func/mean": 0.471214234828949,
+      "rewards/rollout_reward_func/std": 0.6072424650192261,
+      "sampling/importance_sampling_ratio/max": 1.8356192111968994,
+      "sampling/importance_sampling_ratio/mean": 1.0007987022399902,
+      "sampling/importance_sampling_ratio/min": 0.4829617738723755,
+      "sampling/sampling_logp_difference/max": 0.7278177738189697,
+      "sampling/sampling_logp_difference/mean": 0.014709306880831718,
+      "step": 41,
+      "step_time": 87.47009326799707
+    },
+    {
+      "clip_ratio/high_max": 0.034506134572438896,
+      "clip_ratio/high_mean": 0.01836913888109848,
+      "clip_ratio/low_mean": 0.03956068912521005,
+      "clip_ratio/low_min": 0.012500000651925802,
+      "clip_ratio/region_mean": 0.05792982783168554,
+      "entropy": 0.27205855678766966,
+      "epoch": 0.00042,
+      "grad_norm": 1.0188957452774048,
+      "kl": 0.30527770798653364,
+      "learning_rate": 9.990517373346957e-05,
+      "loss": 0.1841,
+      "step": 42,
+      "step_time": 27.952364619004584
+    },
+    {
+      "clip_ratio/high_max": 0.005300949211232364,
+      "clip_ratio/high_mean": 0.002650474605616182,
+      "clip_ratio/low_mean": 0.0015625000814907253,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.004212974687106907,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10094.0,
+      "completions/max_terminated_length": 10094.0,
+      "completions/mean_length": 6369.84375,
+      "completions/mean_terminated_length": 6369.84375,
+      "completions/min_length": 701.0,
+      "completions/min_terminated_length": 701.0,
+      "entropy": 0.24548510648310184,
+      "epoch": 0.00043,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 0.8371849656105042,
+      "kl": 0.22607734380289912,
+      "learning_rate": 9.98709505668081e-05,
+      "loss": -0.1383,
+      "num_tokens": 6024570.0,
+      "reward": 0.5083565711975098,
+      "reward_std": 0.7129669785499573,
+      "rewards/rollout_eval_reward_func/mean": 0.4181910753250122,
+      "rewards/rollout_eval_reward_func/std": 0.3106958866119385,
+      "rewards/rollout_reward_func/mean": 0.5083565711975098,
+      "rewards/rollout_reward_func/std": 0.679851770401001,
+      "sampling/importance_sampling_ratio/max": 1.6035348176956177,
+      "sampling/importance_sampling_ratio/mean": 1.0009479522705078,
+      "sampling/importance_sampling_ratio/min": 0.7113155722618103,
+      "sampling/sampling_logp_difference/max": 0.4722104072570801,
+      "sampling/sampling_logp_difference/mean": 0.010827964171767235,
+      "step": 43,
+      "step_time": 81.8608712560017
+    },
+    {
+      "clip_ratio/high_max": 0.022805775748565793,
+      "clip_ratio/high_mean": 0.01218413794413209,
+      "clip_ratio/low_mean": 0.026488096278626472,
+      "clip_ratio/low_min": 0.0020833334419876337,
+      "clip_ratio/region_mean": 0.03867223463021219,
+      "entropy": 0.2484031356871128,
+      "epoch": 0.00044,
+      "grad_norm": 0.6352972388267517,
+      "kl": 0.24903920874930918,
+      "learning_rate": 9.983147525093428e-05,
+      "loss": -0.1456,
+      "step": 44,
+      "step_time": 28.312056484001005
+    },
+    {
+      "clip_ratio/high_max": 0.0020833334419876337,
+      "clip_ratio/high_mean": 0.0010416667209938169,
+      "clip_ratio/low_mean": 0.0010416667209938169,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0020833334419876337,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10030.0,
+      "completions/max_terminated_length": 10030.0,
+      "completions/mean_length": 7470.40625,
+      "completions/mean_terminated_length": 7470.40625,
+      "completions/min_length": 3212.0,
+      "completions/min_terminated_length": 3212.0,
+      "entropy": 0.26859680097550154,
+      "epoch": 0.00045,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 0.9950742125511169,
+      "kl": 0.2715269709005952,
+      "learning_rate": 9.978675333374685e-05,
+      "loss": 0.1354,
+      "num_tokens": 6292193.0,
+      "reward": 0.31536591053009033,
+      "reward_std": 0.626213550567627,
+      "rewards/rollout_eval_reward_func/mean": 0.2950965166091919,
+      "rewards/rollout_eval_reward_func/std": 0.3288768529891968,
+      "rewards/rollout_reward_func/mean": 0.31536591053009033,
+      "rewards/rollout_reward_func/std": 0.6272794604301453,
+      "sampling/importance_sampling_ratio/max": 1.2761257886886597,
+      "sampling/importance_sampling_ratio/mean": 0.9995177388191223,
+      "sampling/importance_sampling_ratio/min": 0.6398259401321411,
+      "sampling/sampling_logp_difference/max": 0.44655919075012207,
+      "sampling/sampling_logp_difference/mean": 0.01289924792945385,
+      "step": 45,
+      "step_time": 89.98842330299703
+    },
+    {
+      "clip_ratio/high_max": 0.029475471819750965,
+      "clip_ratio/high_mean": 0.017039196158293635,
+      "clip_ratio/low_mean": 0.035884891636669636,
+      "clip_ratio/low_min": 0.014583333861082792,
+      "clip_ratio/region_mean": 0.05292408773675561,
+      "entropy": 0.25596251618117094,
+      "epoch": 0.00046,
+      "grad_norm": 1.0492225885391235,
+      "kl": 0.4555607410147786,
+      "learning_rate": 9.973679110050689e-05,
+      "loss": 0.1236,
+      "step": 46,
+      "step_time": 28.10059149600238
+    },
+    {
+      "clip_ratio/high_max": 0.005558473523706198,
+      "clip_ratio/high_mean": 0.002779236761853099,
+      "clip_ratio/low_mean": 0.0031250001629814506,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.005904236924834549,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10171.0,
+      "completions/max_terminated_length": 10171.0,
+      "completions/mean_length": 7720.34375,
+      "completions/mean_terminated_length": 7720.34375,
+      "completions/min_length": 2255.0,
+      "completions/min_terminated_length": 2255.0,
+      "entropy": 0.21848125476390123,
+      "epoch": 0.00047,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 0.9580699801445007,
+      "kl": 0.2126072864048183,
+      "learning_rate": 9.968159557295458e-05,
+      "loss": 0.2391,
+      "num_tokens": 6567972.0,
+      "reward": 0.585047721862793,
+      "reward_std": 0.4849390387535095,
+      "rewards/rollout_eval_reward_func/mean": 0.35200709104537964,
+      "rewards/rollout_eval_reward_func/std": 0.33855971693992615,
+      "rewards/rollout_reward_func/mean": 0.585047721862793,
+      "rewards/rollout_reward_func/std": 0.4694308936595917,
+      "sampling/importance_sampling_ratio/max": 1.3900582790374756,
+      "sampling/importance_sampling_ratio/mean": 1.0005149841308594,
+      "sampling/importance_sampling_ratio/min": 0.5463369488716125,
+      "sampling/sampling_logp_difference/max": 0.6045193672180176,
+      "sampling/sampling_logp_difference/mean": 0.012745920568704605,
+      "step": 47,
+      "step_time": 91.15270540599704
+    },
+    {
+      "clip_ratio/high_max": 0.03133936191443354,
+      "clip_ratio/high_mean": 0.017232181096915156,
+      "clip_ratio/low_mean": 0.04218750132713467,
+      "clip_ratio/low_min": 0.01145833358168602,
+      "clip_ratio/region_mean": 0.059419682365842164,
+      "entropy": 0.23045554850250483,
+      "epoch": 0.00048,
+      "grad_norm": 1.2474925518035889,
+      "kl": 0.18294932693243027,
+      "learning_rate": 9.962117450832225e-05,
+      "loss": 0.238,
+      "step": 48,
+      "step_time": 29.046616760999314
+    },
+    {
+      "clip_ratio/high_max": 0.006842764443717897,
+      "clip_ratio/high_mean": 0.0034213822218589485,
+      "clip_ratio/low_mean": 0.0015625000232830644,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.004983882245142013,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10017.0,
+      "completions/max_terminated_length": 10017.0,
+      "completions/mean_length": 7918.40625,
+      "completions/mean_terminated_length": 7918.40625,
+      "completions/min_length": 1876.0,
+      "completions/min_terminated_length": 1876.0,
+      "entropy": 0.24847039952874184,
+      "epoch": 0.00049,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.1504110097885132,
+      "kl": 0.3213502997532487,
+      "learning_rate": 9.955553639824423e-05,
+      "loss": 0.1906,
+      "num_tokens": 6849638.0,
+      "reward": 0.39189645648002625,
+      "reward_std": 0.5209037065505981,
+      "rewards/rollout_eval_reward_func/mean": 0.2815040647983551,
+      "rewards/rollout_eval_reward_func/std": 0.332853227853775,
+      "rewards/rollout_reward_func/mean": 0.39189645648002625,
+      "rewards/rollout_reward_func/std": 0.5881980061531067,
+      "sampling/importance_sampling_ratio/max": 1.4030216932296753,
+      "sampling/importance_sampling_ratio/mean": 0.9992052316665649,
+      "sampling/importance_sampling_ratio/min": 0.6490213871002197,
+      "sampling/sampling_logp_difference/max": 0.43228960037231445,
+      "sampling/sampling_logp_difference/mean": 0.011766092851758003,
+      "step": 49,
+      "step_time": 91.98302743600289
+    },
+    {
+      "clip_ratio/high_max": 0.030021664802916348,
+      "clip_ratio/high_mean": 0.01896916568512097,
+      "clip_ratio/low_mean": 0.02840909146470949,
+      "clip_ratio/low_min": 0.0031250001629814506,
+      "clip_ratio/region_mean": 0.04737825732445344,
+      "entropy": 0.22083801217377186,
+      "epoch": 0.0005,
+      "grad_norm": 1.493245005607605,
+      "kl": 0.6161252139136195,
+      "learning_rate": 9.948469046756344e-05,
+      "loss": 0.1882,
+      "step": 50,
+      "step_time": 29.706524382998396
+    },
+    {
+      "clip_ratio/high_max": 0.007615459966473281,
+      "clip_ratio/high_mean": 0.0038077299832366407,
+      "clip_ratio/low_mean": 0.0010416667209938169,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0048493967042304575,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10057.0,
+      "completions/max_terminated_length": 10057.0,
+      "completions/mean_length": 7061.03125,
+      "completions/mean_terminated_length": 7061.03125,
+      "completions/min_length": 2525.0,
+      "completions/min_terminated_length": 2525.0,
+      "entropy": 0.24380221962928772,
+      "epoch": 0.00051,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.1951247453689575,
+      "kl": 0.28687382210046053,
+      "learning_rate": 9.940864667303489e-05,
+      "loss": 0.1425,
+      "num_tokens": 7103728.0,
+      "reward": 0.406146377325058,
+      "reward_std": 0.6755715608596802,
+      "rewards/rollout_eval_reward_func/mean": 0.3859247863292694,
+      "rewards/rollout_eval_reward_func/std": 0.33643871545791626,
+      "rewards/rollout_reward_func/mean": 0.406146377325058,
+      "rewards/rollout_reward_func/std": 0.6774359345436096,
+      "sampling/importance_sampling_ratio/max": 1.367674469947815,
+      "sampling/importance_sampling_ratio/mean": 0.9991032481193542,
+      "sampling/importance_sampling_ratio/min": 0.6542518734931946,
+      "sampling/sampling_logp_difference/max": 0.4242628812789917,
+      "sampling/sampling_logp_difference/mean": 0.012621527537703514,
+      "step": 51,
+      "step_time": 85.52853098199739
+    },
+    {
+      "clip_ratio/high_max": 0.023708798456937075,
+      "clip_ratio/high_mean": 0.015155438333749771,
+      "clip_ratio/low_mean": 0.02644535672152415,
+      "clip_ratio/low_min": 0.009695513173937798,
+      "clip_ratio/region_mean": 0.04160079546272755,
+      "entropy": 0.24589570611715317,
+      "epoch": 0.00052,
+      "grad_norm": 0.6901561617851257,
+      "kl": 0.2809536149725318,
+      "learning_rate": 9.932741570192633e-05,
+      "loss": 0.1278,
+      "step": 52,
+      "step_time": 28.923457664002854
+    },
+    {
+      "clip_ratio/high_max": 0.0011160714784637094,
+      "clip_ratio/high_mean": 0.0005580357392318547,
+      "clip_ratio/low_mean": 0.0005208333604969084,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.001078869099728763,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10169.0,
+      "completions/max_terminated_length": 10169.0,
+      "completions/mean_length": 7814.28125,
+      "completions/mean_terminated_length": 7814.28125,
+      "completions/min_length": 1989.0,
+      "completions/min_terminated_length": 1989.0,
+      "entropy": 0.21275948453694582,
+      "epoch": 0.00053,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 0.7513790130615234,
+      "kl": 0.23512054793536663,
+      "learning_rate": 9.924100897051629e-05,
+      "loss": 0.1945,
+      "num_tokens": 7382261.0,
+      "reward": 0.42753756046295166,
+      "reward_std": 0.49785035848617554,
+      "rewards/rollout_eval_reward_func/mean": 0.26969003677368164,
+      "rewards/rollout_eval_reward_func/std": 0.3341839611530304,
+      "rewards/rollout_reward_func/mean": 0.42753756046295166,
+      "rewards/rollout_reward_func/std": 0.49307680130004883,
+      "sampling/importance_sampling_ratio/max": 1.3325144052505493,
+      "sampling/importance_sampling_ratio/mean": 0.9995752573013306,
+      "sampling/importance_sampling_ratio/min": 0.6147154569625854,
+      "sampling/sampling_logp_difference/max": 0.48659586906433105,
+      "sampling/sampling_logp_difference/mean": 0.010477245785295963,
+      "step": 53,
+      "step_time": 89.77598898800352
+    },
+    {
+      "clip_ratio/high_max": 0.014756215270608664,
+      "clip_ratio/high_mean": 0.007378107635304332,
+      "clip_ratio/low_mean": 0.026041667733807117,
+      "clip_ratio/low_min": 0.008333333651535213,
+      "clip_ratio/region_mean": 0.03341977560194209,
+      "entropy": 0.20128578413277864,
+      "epoch": 0.00054,
+      "grad_norm": 0.570249080657959,
+      "kl": 0.24723996873944998,
+      "learning_rate": 9.914943862248966e-05,
+      "loss": 0.1836,
+      "step": 54,
+      "step_time": 28.66781206799169
+    },
+    {
+      "clip_ratio/high_max": 0.005409664008766413,
+      "clip_ratio/high_mean": 0.0027048320043832064,
+      "clip_ratio/low_mean": 0.0005208333604969084,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.003225665364880115,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 9612.0,
+      "completions/max_terminated_length": 9612.0,
+      "completions/mean_length": 7466.40625,
+      "completions/mean_terminated_length": 7466.40625,
+      "completions/min_length": 897.0,
+      "completions/min_terminated_length": 897.0,
+      "entropy": 0.2242852784693241,
+      "epoch": 0.00055,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 0.7776551246643066,
+      "kl": 0.22325131320394576,
+      "learning_rate": 9.905271752723088e-05,
+      "loss": 0.0206,
+      "num_tokens": 7648812.0,
+      "reward": 0.40199464559555054,
+      "reward_std": 0.5598407983779907,
+      "rewards/rollout_eval_reward_func/mean": 0.3375253975391388,
+      "rewards/rollout_eval_reward_func/std": 0.351279616355896,
+      "rewards/rollout_reward_func/mean": 0.40199464559555054,
+      "rewards/rollout_reward_func/std": 0.5975609421730042,
+      "sampling/importance_sampling_ratio/max": 1.317135214805603,
+      "sampling/importance_sampling_ratio/mean": 0.9976714849472046,
+      "sampling/importance_sampling_ratio/min": 0.6417545676231384,
+      "sampling/sampling_logp_difference/max": 0.4435492753982544,
+      "sampling/sampling_logp_difference/mean": 0.012365585193037987,
+      "step": 55,
+      "step_time": 90.48158546899867
+    },
+    {
+      "clip_ratio/high_max": 0.02967093954794109,
+      "clip_ratio/high_mean": 0.01639796979725361,
+      "clip_ratio/low_mean": 0.017361111822538078,
+      "clip_ratio/low_min": 0.0031250001629814506,
+      "clip_ratio/region_mean": 0.03375908185262233,
+      "entropy": 0.2281673550605774,
+      "epoch": 0.00056,
+      "grad_norm": 0.48372626304626465,
+      "kl": 0.23605143558233976,
+      "learning_rate": 9.895085927801542e-05,
+      "loss": 0.0086,
+      "step": 56,
+      "step_time": 27.291444884000157
+    },
+    {
+      "clip_ratio/high_max": 0.003557769814506173,
+      "clip_ratio/high_mean": 0.0017788849072530866,
+      "clip_ratio/low_mean": 0.0015625000814907253,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.003341384930536151,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10322.0,
+      "completions/max_terminated_length": 10322.0,
+      "completions/mean_length": 6645.3125,
+      "completions/mean_terminated_length": 6645.3125,
+      "completions/min_length": 1995.0,
+      "completions/min_terminated_length": 1995.0,
+      "entropy": 0.22339679207652807,
+      "epoch": 0.00057,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 0.7947802543640137,
+      "kl": 0.33472199738025665,
+      "learning_rate": 9.884387819009922e-05,
+      "loss": 0.0286,
+      "num_tokens": 7889241.0,
+      "reward": 0.3272937536239624,
+      "reward_std": 0.7356898784637451,
+      "rewards/rollout_eval_reward_func/mean": 0.38528963923454285,
+      "rewards/rollout_eval_reward_func/std": 0.3158987760543823,
+      "rewards/rollout_reward_func/mean": 0.3272937536239624,
+      "rewards/rollout_reward_func/std": 0.7287615537643433,
+      "sampling/importance_sampling_ratio/max": 1.519856333732605,
+      "sampling/importance_sampling_ratio/mean": 1.0008394718170166,
+      "sampling/importance_sampling_ratio/min": 0.6888355612754822,
+      "sampling/sampling_logp_difference/max": 0.41861581802368164,
+      "sampling/sampling_logp_difference/mean": 0.01188460923731327,
+      "step": 57,
+      "step_time": 83.6079965079989
+    },
+    {
+      "clip_ratio/high_max": 0.02337649872060865,
+      "clip_ratio/high_mean": 0.012729916197713464,
+      "clip_ratio/low_mean": 0.03550771565642208,
+      "clip_ratio/low_min": 0.013886852888390422,
+      "clip_ratio/region_mean": 0.048237632028758526,
+      "entropy": 0.23247116059064865,
+      "epoch": 0.00058,
+      "grad_norm": 0.6895915269851685,
+      "kl": 0.30278117302805185,
+      "learning_rate": 9.873178929870695e-05,
+      "loss": 0.0178,
+      "step": 58,
+      "step_time": 29.01562165299947
+    },
+    {
+      "clip_ratio/high_max": 0.006458333344198763,
+      "clip_ratio/high_mean": 0.00375000003259629,
+      "clip_ratio/low_mean": 0.0010416667209938169,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.004791666753590107,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10121.0,
+      "completions/max_terminated_length": 10121.0,
+      "completions/mean_length": 7354.9375,
+      "completions/mean_terminated_length": 7354.9375,
+      "completions/min_length": 1114.0,
+      "completions/min_terminated_length": 1114.0,
+      "entropy": 0.2855970785021782,
+      "epoch": 0.00059,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.1581138372421265,
+      "kl": 0.30998079385608435,
+      "learning_rate": 9.86146083569188e-05,
+      "loss": -0.077,
+      "num_tokens": 8152533.0,
+      "reward": 0.12930195033550262,
+      "reward_std": 0.6660091876983643,
+      "rewards/rollout_eval_reward_func/mean": 0.33841466903686523,
+      "rewards/rollout_eval_reward_func/std": 0.3268774151802063,
+      "rewards/rollout_reward_func/mean": 0.12930195033550262,
+      "rewards/rollout_reward_func/std": 0.7711123824119568,
+      "sampling/importance_sampling_ratio/max": 1.4381568431854248,
+      "sampling/importance_sampling_ratio/mean": 0.9980136156082153,
+      "sampling/importance_sampling_ratio/min": 0.7020198106765747,
+      "sampling/sampling_logp_difference/max": 0.36336231231689453,
+      "sampling/sampling_logp_difference/mean": 0.016959059983491898,
+      "step": 59,
+      "step_time": 87.87077508199764
+    },
+    {
+      "clip_ratio/high_max": 0.048061754438094795,
+      "clip_ratio/high_mean": 0.031483913655392826,
+      "clip_ratio/low_mean": 0.04418836906552315,
+      "clip_ratio/low_min": 0.007291666814126074,
+      "clip_ratio/region_mean": 0.07567228260450065,
+      "entropy": 0.26963882334530354,
+      "epoch": 0.0006,
+      "grad_norm": 1.0022964477539062,
+      "kl": 0.30027929320931435,
+      "learning_rate": 9.84923518334567e-05,
+      "loss": -0.0828,
+      "step": 60,
+      "step_time": 28.71259851099967
+    },
+    {
+      "clip_ratio/high_max": 0.01005121401976794,
+      "clip_ratio/high_mean": 0.005546440428588539,
+      "clip_ratio/low_mean": 0.0020026409183628857,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.007549081346951425,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10379.0,
+      "completions/max_terminated_length": 10379.0,
+      "completions/mean_length": 6955.875,
+      "completions/mean_terminated_length": 6955.875,
+      "completions/min_length": 2081.0,
+      "completions/min_terminated_length": 2081.0,
+      "entropy": 0.23451983137056231,
+      "epoch": 0.00061,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.1702102422714233,
+      "kl": 0.28609952982515097,
+      "learning_rate": 9.83650369103696e-05,
+      "loss": 0.0631,
+      "num_tokens": 8403186.0,
+      "reward": 0.3940112888813019,
+      "reward_std": 0.67255699634552,
+      "rewards/rollout_eval_reward_func/mean": 0.3715701103210449,
+      "rewards/rollout_eval_reward_func/std": 0.3261474072933197,
+      "rewards/rollout_reward_func/mean": 0.3940112888813019,
+      "rewards/rollout_reward_func/std": 0.6762000322341919,
+      "sampling/importance_sampling_ratio/max": 1.3093942403793335,
+      "sampling/importance_sampling_ratio/mean": 1.0008020401000977,
+      "sampling/importance_sampling_ratio/min": 0.5961512923240662,
+      "sampling/sampling_logp_difference/max": 0.5172607898712158,
+      "sampling/sampling_logp_difference/mean": 0.014042183756828308,
+      "step": 61,
+      "step_time": 86.51144317899707
+    },
+    {
+      "clip_ratio/high_max": 0.051156656933017075,
+      "clip_ratio/high_mean": 0.03779221937293187,
+      "clip_ratio/low_mean": 0.05211732583120465,
+      "clip_ratio/low_min": 0.024354460649192333,
+      "clip_ratio/region_mean": 0.08990954549517483,
+      "entropy": 0.21548824943602085,
+      "epoch": 0.00062,
+      "grad_norm": 1.1680642366409302,
+      "kl": 0.5453370595350862,
+      "learning_rate": 9.823268148061883e-05,
+      "loss": 0.0666,
+      "step": 62,
+      "step_time": 28.28093677799916
+    },
+    {
+      "clip_ratio/high_max": 0.009642903693020344,
+      "clip_ratio/high_mean": 0.004821451846510172,
+      "clip_ratio/low_mean": 0.0010416667209938169,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.005863118567503989,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 9611.0,
+      "completions/max_terminated_length": 9611.0,
+      "completions/mean_length": 5474.5,
+      "completions/mean_terminated_length": 5474.5,
+      "completions/min_length": 1264.0,
+      "completions/min_terminated_length": 1264.0,
+      "entropy": 0.20994199626147747,
+      "epoch": 0.00063,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 1.5162609815597534,
+      "kl": 0.3376044826582074,
+      "learning_rate": 9.809530414556335e-05,
+      "loss": 0.1386,
+      "num_tokens": 8606212.0,
+      "reward": 0.6158473491668701,
+      "reward_std": 0.705132782459259,
+      "rewards/rollout_eval_reward_func/mean": 0.5119410753250122,
+      "rewards/rollout_eval_reward_func/std": 0.2654803693294525,
+      "rewards/rollout_reward_func/mean": 0.6158473491668701,
+      "rewards/rollout_reward_func/std": 0.6767383813858032,
+      "sampling/importance_sampling_ratio/max": 1.9123412370681763,
+      "sampling/importance_sampling_ratio/mean": 0.9994137287139893,
+      "sampling/importance_sampling_ratio/min": 0.6006231904029846,
+      "sampling/sampling_logp_difference/max": 0.6483283042907715,
+      "sampling/sampling_logp_difference/mean": 0.015111252665519714,
+      "step": 63,
+      "step_time": 74.64896667399807
+    },
+    {
+      "clip_ratio/high_max": 0.05132549628615379,
+      "clip_ratio/high_mean": 0.030718339723534882,
+      "clip_ratio/low_mean": 0.028882576967589557,
+      "clip_ratio/low_min": 0.0031250000465661287,
+      "clip_ratio/region_mean": 0.05960091657470912,
+      "entropy": 0.20065013086423278,
+      "epoch": 0.00064,
+      "grad_norm": 1.244667649269104,
+      "kl": 0.453593029640615,
+      "learning_rate": 9.79529242123455e-05,
+      "loss": 0.1234,
+      "step": 64,
+      "step_time": 24.8986287849948
+    },
+    {
+      "clip_ratio/high_max": 0.0077537596225738525,
+      "clip_ratio/high_mean": 0.0038768798112869263,
+      "clip_ratio/low_mean": 0.0005208333604969084,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.004397713171783835,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10299.0,
+      "completions/max_terminated_length": 10299.0,
+      "completions/mean_length": 6500.03125,
+      "completions/mean_terminated_length": 6500.03125,
+      "completions/min_length": 1712.0,
+      "completions/min_terminated_length": 1712.0,
+      "entropy": 0.14482268318533897,
+      "epoch": 0.00065,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 0.9947667121887207,
+      "kl": 0.2627185983583331,
+      "learning_rate": 9.780556169117757e-05,
+      "loss": 0.0665,
+      "num_tokens": 8841902.0,
+      "reward": 0.6678237915039062,
+      "reward_std": 0.5866862535476685,
+      "rewards/rollout_eval_reward_func/mean": 0.5013973712921143,
+      "rewards/rollout_eval_reward_func/std": 0.27832266688346863,
+      "rewards/rollout_reward_func/mean": 0.6678237915039062,
+      "rewards/rollout_reward_func/std": 0.5921808481216431,
+      "sampling/importance_sampling_ratio/max": 1.4597694873809814,
+      "sampling/importance_sampling_ratio/mean": 0.99915611743927,
+      "sampling/importance_sampling_ratio/min": 0.27695003151893616,
+      "sampling/sampling_logp_difference/max": 1.2839181423187256,
+      "sampling/sampling_logp_difference/mean": 0.010844534263014793,
+      "step": 65,
+      "step_time": 80.86000475000401
+    },
+    {
+      "clip_ratio/high_max": 0.025044884881936014,
+      "clip_ratio/high_mean": 0.014345359115395695,
+      "clip_ratio/low_mean": 0.02013494382845238,
+      "clip_ratio/low_min": 0.0020833334419876337,
+      "clip_ratio/region_mean": 0.03448030271101743,
+      "entropy": 0.13131517032161355,
+      "epoch": 0.00066,
+      "grad_norm": 0.4750834107398987,
+      "kl": 0.34219094878062606,
+      "learning_rate": 9.765323729252955e-05,
+      "loss": 0.0561,
+      "step": 66,
+      "step_time": 28.661124781996477
+    },
+    {
+      "clip_ratio/high_max": 0.009476827806793153,
+      "clip_ratio/high_mean": 0.0062825315981172025,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0062825315981172025,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10269.0,
+      "completions/max_terminated_length": 10269.0,
+      "completions/mean_length": 6501.5,
+      "completions/mean_terminated_length": 6501.5,
+      "completions/min_length": 724.0,
+      "completions/min_terminated_length": 724.0,
+      "entropy": 0.14845013478770852,
+      "epoch": 0.00067,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 0.9048762917518616,
+      "kl": 0.3337597157806158,
+      "learning_rate": 9.749597242421838e-05,
+      "loss": 0.0677,
+      "num_tokens": 9077833.0,
+      "reward": 0.6164548397064209,
+      "reward_std": 0.5683386325836182,
+      "rewards/rollout_eval_reward_func/mean": 0.4744664430618286,
+      "rewards/rollout_eval_reward_func/std": 0.29613611102104187,
+      "rewards/rollout_reward_func/mean": 0.6164548397064209,
+      "rewards/rollout_reward_func/std": 0.6236394643783569,
+      "sampling/importance_sampling_ratio/max": 1.9927904605865479,
+      "sampling/importance_sampling_ratio/mean": 1.0013047456741333,
+      "sampling/importance_sampling_ratio/min": 0.5228504538536072,
+      "sampling/sampling_logp_difference/max": 0.6895358562469482,
+      "sampling/sampling_logp_difference/mean": 0.011342051438987255,
+      "step": 67,
+      "step_time": 79.70687562199964
+    },
+    {
+      "clip_ratio/high_max": 0.0376884457655251,
+      "clip_ratio/high_mean": 0.026128767582122236,
+      "clip_ratio/low_mean": 0.026416301843710244,
+      "clip_ratio/low_min": 0.007291666814126074,
+      "clip_ratio/region_mean": 0.0525450695422478,
+      "entropy": 0.15412914380431175,
+      "epoch": 0.00068,
+      "grad_norm": 0.8491650223731995,
+      "kl": 0.3899666126817465,
+      "learning_rate": 9.733378918839942e-05,
+      "loss": 0.0638,
+      "step": 68,
+      "step_time": 27.40538086699962
+    },
+    {
+      "clip_ratio/high_max": 0.006514550419524312,
+      "clip_ratio/high_mean": 0.003257275209762156,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.003257275209762156,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10202.0,
+      "completions/max_terminated_length": 10202.0,
+      "completions/mean_length": 5860.65625,
+      "completions/mean_terminated_length": 5860.65625,
+      "completions/min_length": 540.0,
+      "completions/min_terminated_length": 540.0,
+      "entropy": 0.16269859950989485,
+      "epoch": 0.00069,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 0.9143983721733093,
+      "kl": 0.47777214366942644,
+      "learning_rate": 9.716671037846007e-05,
+      "loss": 0.1152,
+      "num_tokens": 9293397.0,
+      "reward": 0.6956244707107544,
+      "reward_std": 0.5506021976470947,
+      "rewards/rollout_eval_reward_func/mean": 0.5125762224197388,
+      "rewards/rollout_eval_reward_func/std": 0.2857610881328583,
+      "rewards/rollout_reward_func/mean": 0.6956244707107544,
+      "rewards/rollout_reward_func/std": 0.5588130354881287,
+      "sampling/importance_sampling_ratio/max": 1.4407436847686768,
+      "sampling/importance_sampling_ratio/mean": 1.0004699230194092,
+      "sampling/importance_sampling_ratio/min": 0.5672728419303894,
+      "sampling/sampling_logp_difference/max": 0.5669147968292236,
+      "sampling/sampling_logp_difference/mean": 0.010705020278692245,
+      "step": 69,
+      "step_time": 77.93740563500614
+    },
+    {
+      "clip_ratio/high_max": 0.04634982522111386,
+      "clip_ratio/high_mean": 0.029493737209122628,
+      "clip_ratio/low_mean": 0.01730769290588796,
+      "clip_ratio/low_min": 0.004166666767559946,
+      "clip_ratio/region_mean": 0.046801429823972285,
+      "entropy": 0.1784980888478458,
+      "epoch": 0.0007,
+      "grad_norm": 0.7063129544258118,
+      "kl": 0.3514184970408678,
+      "learning_rate": 9.699475947581644e-05,
+      "loss": 0.1049,
+      "step": 70,
+      "step_time": 27.06573885999751
+    },
+    {
+      "clip_ratio/high_max": 0.0018382353009656072,
+      "clip_ratio/high_mean": 0.0009191176504828036,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0009191176504828036,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10593.0,
+      "completions/max_terminated_length": 10593.0,
+      "completions/mean_length": 6225.28125,
+      "completions/mean_terminated_length": 6225.28125,
+      "completions/min_length": 1544.0,
+      "completions/min_terminated_length": 1544.0,
+      "entropy": 0.17604797054082155,
+      "epoch": 0.00071,
+      "frac_reward_zero_std": 0.25,
+      "grad_norm": 0.7393732070922852,
+      "kl": 0.20170354284346104,
+      "learning_rate": 9.681796064661319e-05,
+      "loss": 0.0413,
+      "num_tokens": 9520372.0,
+      "reward": 0.8103519678115845,
+      "reward_std": 0.3980957269668579,
+      "rewards/rollout_eval_reward_func/mean": 0.5907012224197388,
+      "rewards/rollout_eval_reward_func/std": 0.20860876142978668,
+      "rewards/rollout_reward_func/mean": 0.8103519678115845,
+      "rewards/rollout_reward_func/std": 0.49828964471817017,
+      "sampling/importance_sampling_ratio/max": 1.5257266759872437,
+      "sampling/importance_sampling_ratio/mean": 0.9991195201873779,
+      "sampling/importance_sampling_ratio/min": 0.6470949649810791,
+      "sampling/sampling_logp_difference/max": 0.43526220321655273,
+      "sampling/sampling_logp_difference/mean": 0.010150602087378502,
+      "step": 71,
+      "step_time": 79.67722541299918
+    },
+    {
+      "clip_ratio/high_max": 0.01454339677002281,
+      "clip_ratio/high_mean": 0.008729609136935323,
+      "clip_ratio/low_mean": 0.009114583488553762,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.01784419280011207,
+      "entropy": 0.18162205442786217,
+      "epoch": 0.00072,
+      "grad_norm": 0.44046881794929504,
+      "kl": 0.20182663016021252,
+      "learning_rate": 9.663633873832725e-05,
+      "loss": 0.0328,
+      "step": 72,
+      "step_time": 28.538212690000364
+    },
+    {
+      "clip_ratio/high_max": 0.004232634324580431,
+      "clip_ratio/high_mean": 0.0021163171622902155,
+      "clip_ratio/low_mean": 0.0005208333604969084,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.002637150522787124,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 10142.0,
+      "completions/max_terminated_length": 10142.0,
+      "completions/mean_length": 6840.59375,
+      "completions/mean_terminated_length": 6840.59375,
+      "completions/min_length": 1013.0,
+      "completions/min_terminated_length": 1013.0,
+      "entropy": 0.20604060776531696,
+      "epoch": 0.00073,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 0.6912176609039307,
+      "kl": 0.26183000626042485,
+      "learning_rate": 9.644991927627566e-05,
+      "loss": -0.0088,
+      "num_tokens": 9767000.0,
+      "reward": 0.7756590247154236,
+      "reward_std": 0.5361872911453247,
+      "rewards/rollout_eval_reward_func/mean": 0.5909552574157715,
+      "rewards/rollout_eval_reward_func/std": 0.2465948760509491,
+      "rewards/rollout_reward_func/mean": 0.7756590247154236,
+      "rewards/rollout_reward_func/std": 0.5321318507194519,
+      "sampling/importance_sampling_ratio/max": 1.2645851373672485,
+      "sampling/importance_sampling_ratio/mean": 1.0009121894836426,
+      "sampling/importance_sampling_ratio/min": 0.6386132836341858,
+      "sampling/sampling_logp_difference/max": 0.4484562873840332,
+      "sampling/sampling_logp_difference/mean": 0.010102368891239166,
+      "step": 73,
+      "step_time": 82.14820753000458
+    },
+    {
+      "clip_ratio/high_max": 0.02557993505615741,
+      "clip_ratio/high_mean": 0.01748599053826183,
+      "clip_ratio/low_mean": 0.009895833674818277,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.027381824154872447,
+      "entropy": 0.22169003915041685,
+      "epoch": 0.00074,
+      "grad_norm": 0.42521047592163086,
+      "kl": 0.23322301171720028,
+      "learning_rate": 9.625872846002834e-05,
+      "loss": -0.0155,
+      "step": 74,
+      "step_time": 28.134478513999056
+    },
+    {
+      "clip_ratio/high_max": 0.008986742584966123,
+      "clip_ratio/high_mean": 0.005014204594772309,
+      "clip_ratio/low_mean": 0.002018229220993817,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.007032433815766126,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 9857.0,
+      "completions/max_terminated_length": 9857.0,
+      "completions/mean_length": 7195.375,
+      "completions/mean_terminated_length": 7195.375,
+      "completions/min_length": 768.0,
+      "completions/min_terminated_length": 768.0,
+      "entropy": 0.27833056077361107,
+      "epoch": 0.00075,
+      "frac_reward_zero_std": 0.0,
+      "grad_norm": 0.8185970187187195,
+      "kl": 0.24367811996489763,
+      "learning_rate": 9.606279315972582e-05,
+      "loss": -0.1492,
+      "num_tokens": 10025024.0,
+      "reward": 0.2978099584579468,
+      "reward_std": 0.6597497463226318,
+      "rewards/rollout_eval_reward_func/mean": 0.32901421189308167,
+      "rewards/rollout_eval_reward_func/std": 0.3157320022583008,
+      "rewards/rollout_reward_func/mean": 0.2978099584579468,
+      "rewards/rollout_reward_func/std": 0.690675675868988,
+      "sampling/importance_sampling_ratio/max": 1.4156017303466797,
+      "sampling/importance_sampling_ratio/mean": 1.0000808238983154,
+      "sampling/importance_sampling_ratio/min": 0.6558278799057007,
+      "sampling/sampling_logp_difference/max": 0.4218568801879883,
+      "sampling/sampling_logp_difference/mean": 0.013327672146260738,
+      "step": 75,
+      "step_time": 88.96669697499601
+    },
+    {
+      "epoch": 0.00075,
+      "eval_clip_ratio/high_max": 0.0,
+      "eval_clip_ratio/high_mean": 0.0,
+      "eval_clip_ratio/low_mean": 0.0,
+      "eval_clip_ratio/low_min": 0.0,
+      "eval_clip_ratio/region_mean": 0.0,
+      "eval_completions/clipped_ratio": 0.0,
+      "eval_completions/max_length": 9194.0,
+      "eval_completions/max_terminated_length": 9194.0,
+      "eval_completions/mean_length": 7026.0375,
+      "eval_completions/mean_terminated_length": 7026.0375,
+      "eval_completions/min_length": 4333.95,
+      "eval_completions/min_terminated_length": 4333.95,
+      "eval_entropy": 0.3085056647658348,
+      "eval_frac_reward_zero_std": 1.0,
+      "eval_kl": 0.22236853390932082,
+      "eval_loss": 0.0002063037100015208,
+      "eval_num_tokens": 10025024.0,
+      "eval_reward": 0.35444250535219907,
+      "eval_reward_std": 0.0,
+      "eval_rewards/rollout_eval_reward_func/mean": 0.3484247986227274,
+      "eval_rewards/rollout_eval_reward_func/std": 0.26531881298869847,
+      "eval_rewards/rollout_reward_func/mean": 0.35444250535219907,
+      "eval_rewards/rollout_reward_func/std": 0.5791118375957012,
+      "eval_runtime": 161.4965,
+      "eval_samples_per_second": 0.062,
+      "eval_sampling/importance_sampling_ratio/max": 1.1964155852794647,
+      "eval_sampling/importance_sampling_ratio/mean": 1.0003154128789902,
+      "eval_sampling/importance_sampling_ratio/min": 0.7968822807073593,
+      "eval_sampling/sampling_logp_difference/max": 0.2617991387844086,
+      "eval_sampling/sampling_logp_difference/mean": 0.01210988024249673,
+      "eval_steps_per_second": 0.019,
+      "step": 75
+    }
+  ],
+  "logging_steps": 1.0,
+  "max_steps": 300,
+  "num_input_tokens_seen": 10025024,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/training_args.bin b/training_args.bin
new file mode 100644
index 0000000..22a07b1
--- /dev/null
+++ b/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:258bdc828cf0aea84d4105d1bc4961ef5cad4760f620dfe20a527a870df319c1
+size 8145