commit ce5246e3fdb32287b75797855ab47b0c0d083ce6
Author: ModelHub XC <noreply@modelhub.org.cn>
Date:   Tue Jun 16 07:50:16 2026 +0800

    初始化项目，由ModelHub XC社区提供模型
    
    Model: varshak1/open_reward_agent_sft_lf
    Source: Original Platform

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..52373fe
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,36 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..5ef6857
--- /dev/null
+++ b/README.md
@@ -0,0 +1,61 @@
+---
+library_name: transformers
+license: other
+base_model: Qwen/Qwen3-8B
+tags:
+- llama-factory
+- full
+- generated_from_trainer
+model-index:
+- name: open_reward_agent_sft_lf
+  results: []
+---
+
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+
+# open_reward_agent_sft_lf
+
+This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the open_reward_agent_sft_lf dataset.
+
+## Model description
+
+More information needed
+
+## Intended uses & limitations
+
+More information needed
+
+## Training and evaluation data
+
+More information needed
+
+## Training procedure
+
+### Training hyperparameters
+
+The following hyperparameters were used during training:
+- learning_rate: 8e-06
+- train_batch_size: 1
+- eval_batch_size: 8
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 8
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 64
+- total_eval_batch_size: 64
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 0.05
+- num_epochs: 1.0
+
+### Training results
+
+
+
+### Framework versions
+
+- Transformers 5.2.0
+- Pytorch 2.6.0+cu124
+- Datasets 4.0.0
+- Tokenizers 0.22.2
diff --git a/all_results.json b/all_results.json
new file mode 100644
index 0000000..1f3be63
--- /dev/null
+++ b/all_results.json
@@ -0,0 +1,8 @@
+{
+    "epoch": 1.0,
+    "total_flos": 2.640143661303595e+18,
+    "train_loss": 0.9932206619076612,
+    "train_runtime": 1797.9362,
+    "train_samples_per_second": 7.286,
+    "train_steps_per_second": 0.114
+}
\ No newline at end of file
diff --git a/chat_template.jinja b/chat_template.jinja
new file mode 100644
index 0000000..01be9b3
--- /dev/null
+++ b/chat_template.jinja
@@ -0,0 +1,89 @@
+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if message.content is string %}
+        {%- set content = message.content %}
+    {%- else %}
+        {%- set content = '' %}
+    {%- endif %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if loop.last or (not loop.last and reasoning_content) %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '\n' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..a8e7a96
--- /dev/null
+++ b/config.json
@@ -0,0 +1,71 @@
+{
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": null,
+  "dtype": "bfloat16",
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "pad_token_id": 151643,
+  "rms_norm_eps": 1e-06,
+  "rope_parameters": {
+    "rope_theta": 1000000,
+    "rope_type": "default"
+  },
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "transformers_version": "5.2.0",
+  "use_cache": false,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000..1701c94
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,12 @@
+{
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "temperature": 0.6,
+  "top_k": 20,
+  "top_p": 0.95,
+  "transformers_version": "5.2.0"
+}
diff --git a/model.safetensors b/model.safetensors
new file mode 100644
index 0000000..e579a8c
--- /dev/null
+++ b/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6627998b45798f599014942c9bf5bf8977efdc0468ef27b94fe3597f50625f1f
+size 16381517208
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000..c7afbed
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
+size 11422650
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000..145e2c7
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,30 @@
+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "is_local": false,
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}
diff --git a/train_results.json b/train_results.json
new file mode 100644
index 0000000..1f3be63
--- /dev/null
+++ b/train_results.json
@@ -0,0 +1,8 @@
+{
+    "epoch": 1.0,
+    "total_flos": 2.640143661303595e+18,
+    "train_loss": 0.9932206619076612,
+    "train_runtime": 1797.9362,
+    "train_samples_per_second": 7.286,
+    "train_steps_per_second": 0.114
+}
\ No newline at end of file
diff --git a/trainer_log.jsonl b/trainer_log.jsonl
new file mode 100644
index 0000000..f3d130a
--- /dev/null
+++ b/trainer_log.jsonl
@@ -0,0 +1,21 @@
+{"current_steps": 10, "total_steps": 205, "loss": 1.6103919982910155, "lr": 6.545454545454546e-06, "epoch": 0.04884004884004884, "percentage": 4.88, "elapsed_time": "0:01:30", "remaining_time": "0:29:33"}
+{"current_steps": 20, "total_steps": 205, "loss": 1.1710912704467773, "lr": 7.9664804049057e-06, "epoch": 0.09768009768009768, "percentage": 9.76, "elapsed_time": "0:02:55", "remaining_time": "0:27:04"}
+{"current_steps": 30, "total_steps": 205, "loss": 1.0446645736694335, "lr": 7.831269296751948e-06, "epoch": 0.14652014652014653, "percentage": 14.63, "elapsed_time": "0:04:19", "remaining_time": "0:25:14"}
+{"current_steps": 40, "total_steps": 205, "loss": 0.9899624824523926, "lr": 7.595806964341581e-06, "epoch": 0.19536019536019536, "percentage": 19.51, "elapsed_time": "0:05:44", "remaining_time": "0:23:39"}
+{"current_steps": 50, "total_steps": 205, "loss": 0.9951982498168945, "lr": 7.266254652228843e-06, "epoch": 0.2442002442002442, "percentage": 24.39, "elapsed_time": "0:07:11", "remaining_time": "0:22:17"}
+{"current_steps": 60, "total_steps": 205, "loss": 0.9782312393188477, "lr": 6.851235618187317e-06, "epoch": 0.29304029304029305, "percentage": 29.27, "elapsed_time": "0:08:34", "remaining_time": "0:20:42"}
+{"current_steps": 70, "total_steps": 205, "loss": 0.9667717933654785, "lr": 6.36160949202369e-06, "epoch": 0.3418803418803419, "percentage": 34.15, "elapsed_time": "0:09:58", "remaining_time": "0:19:13"}
+{"current_steps": 80, "total_steps": 205, "loss": 0.9572884559631347, "lr": 5.810188116178156e-06, "epoch": 0.3907203907203907, "percentage": 39.02, "elapsed_time": "0:11:20", "remaining_time": "0:17:43"}
+{"current_steps": 90, "total_steps": 205, "loss": 0.9558270454406739, "lr": 5.211400303591802e-06, "epoch": 0.43956043956043955, "percentage": 43.9, "elapsed_time": "0:12:45", "remaining_time": "0:16:17"}
+{"current_steps": 100, "total_steps": 205, "loss": 0.9527605056762696, "lr": 4.580914284981961e-06, "epoch": 0.4884004884004884, "percentage": 48.78, "elapsed_time": "0:14:08", "remaining_time": "0:14:51"}
+{"current_steps": 110, "total_steps": 205, "loss": 0.9419396400451661, "lr": 3.935227724789994e-06, "epoch": 0.5372405372405372, "percentage": 53.66, "elapsed_time": "0:15:33", "remaining_time": "0:13:26"}
+{"current_steps": 120, "total_steps": 205, "loss": 0.9319709777832031, "lr": 3.2912360336831093e-06, "epoch": 0.5860805860805861, "percentage": 58.54, "elapsed_time": "0:16:57", "remaining_time": "0:12:00"}
+{"current_steps": 130, "total_steps": 205, "loss": 0.9522204399108887, "lr": 2.665790273396718e-06, "epoch": 0.6349206349206349, "percentage": 63.41, "elapsed_time": "0:18:20", "remaining_time": "0:10:34"}
+{"current_steps": 140, "total_steps": 205, "loss": 0.9412946701049805, "lr": 2.0752562220367795e-06, "epoch": 0.6837606837606838, "percentage": 68.29, "elapsed_time": "0:19:47", "remaining_time": "0:09:11"}
+{"current_steps": 150, "total_steps": 205, "loss": 0.9191699981689453, "lr": 1.5350861375962904e-06, "epoch": 0.7326007326007326, "percentage": 73.17, "elapsed_time": "0:21:11", "remaining_time": "0:07:46"}
+{"current_steps": 160, "total_steps": 205, "loss": 0.9190822601318359, "lr": 1.0594144251711994e-06, "epoch": 0.7814407814407814, "percentage": 78.05, "elapsed_time": "0:22:37", "remaining_time": "0:06:21"}
+{"current_steps": 170, "total_steps": 205, "loss": 0.9286371231079101, "lr": 6.606877878829161e-07, "epoch": 0.8302808302808303, "percentage": 82.93, "elapsed_time": "0:24:01", "remaining_time": "0:04:56"}
+{"current_steps": 180, "total_steps": 205, "loss": 0.9289794921875, "lr": 3.4933953919383984e-07, "epoch": 0.8791208791208791, "percentage": 87.8, "elapsed_time": "0:25:24", "remaining_time": "0:03:31"}
+{"current_steps": 190, "total_steps": 205, "loss": 0.919953727722168, "lr": 1.3351659874955546e-07, "epoch": 0.927960927960928, "percentage": 92.68, "elapsed_time": "0:26:51", "remaining_time": "0:02:07"}
+{"current_steps": 200, "total_steps": 205, "loss": 0.9213088989257813, "lr": 1.8866315333544213e-08, "epoch": 0.9768009768009768, "percentage": 97.56, "elapsed_time": "0:28:14", "remaining_time": "0:00:42"}
+{"current_steps": 205, "total_steps": 205, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:29:56", "remaining_time": "0:00:00"}
diff --git a/trainer_state.json b/trainer_state.json
new file mode 100644
index 0000000..5456ba7
--- /dev/null
+++ b/trainer_state.json
@@ -0,0 +1,183 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 205,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.04884004884004884,
+      "grad_norm": 2.951613426208496,
+      "learning_rate": 6.545454545454546e-06,
+      "loss": 1.6103919982910155,
+      "step": 10
+    },
+    {
+      "epoch": 0.09768009768009768,
+      "grad_norm": 1.4161419868469238,
+      "learning_rate": 7.9664804049057e-06,
+      "loss": 1.1710912704467773,
+      "step": 20
+    },
+    {
+      "epoch": 0.14652014652014653,
+      "grad_norm": 1.1890255212783813,
+      "learning_rate": 7.831269296751948e-06,
+      "loss": 1.0446645736694335,
+      "step": 30
+    },
+    {
+      "epoch": 0.19536019536019536,
+      "grad_norm": 1.1855751276016235,
+      "learning_rate": 7.595806964341581e-06,
+      "loss": 0.9899624824523926,
+      "step": 40
+    },
+    {
+      "epoch": 0.2442002442002442,
+      "grad_norm": 1.4533610343933105,
+      "learning_rate": 7.266254652228843e-06,
+      "loss": 0.9951982498168945,
+      "step": 50
+    },
+    {
+      "epoch": 0.29304029304029305,
+      "grad_norm": 1.264499545097351,
+      "learning_rate": 6.851235618187317e-06,
+      "loss": 0.9782312393188477,
+      "step": 60
+    },
+    {
+      "epoch": 0.3418803418803419,
+      "grad_norm": 1.3089553117752075,
+      "learning_rate": 6.36160949202369e-06,
+      "loss": 0.9667717933654785,
+      "step": 70
+    },
+    {
+      "epoch": 0.3907203907203907,
+      "grad_norm": 1.0916252136230469,
+      "learning_rate": 5.810188116178156e-06,
+      "loss": 0.9572884559631347,
+      "step": 80
+    },
+    {
+      "epoch": 0.43956043956043955,
+      "grad_norm": 1.2539805173873901,
+      "learning_rate": 5.211400303591802e-06,
+      "loss": 0.9558270454406739,
+      "step": 90
+    },
+    {
+      "epoch": 0.4884004884004884,
+      "grad_norm": 1.1267527341842651,
+      "learning_rate": 4.580914284981961e-06,
+      "loss": 0.9527605056762696,
+      "step": 100
+    },
+    {
+      "epoch": 0.5372405372405372,
+      "grad_norm": 1.0940070152282715,
+      "learning_rate": 3.935227724789994e-06,
+      "loss": 0.9419396400451661,
+      "step": 110
+    },
+    {
+      "epoch": 0.5860805860805861,
+      "grad_norm": 1.7783108949661255,
+      "learning_rate": 3.2912360336831093e-06,
+      "loss": 0.9319709777832031,
+      "step": 120
+    },
+    {
+      "epoch": 0.6349206349206349,
+      "grad_norm": 1.1148947477340698,
+      "learning_rate": 2.665790273396718e-06,
+      "loss": 0.9522204399108887,
+      "step": 130
+    },
+    {
+      "epoch": 0.6837606837606838,
+      "grad_norm": 1.0567108392715454,
+      "learning_rate": 2.0752562220367795e-06,
+      "loss": 0.9412946701049805,
+      "step": 140
+    },
+    {
+      "epoch": 0.7326007326007326,
+      "grad_norm": 1.0418899059295654,
+      "learning_rate": 1.5350861375962904e-06,
+      "loss": 0.9191699981689453,
+      "step": 150
+    },
+    {
+      "epoch": 0.7814407814407814,
+      "grad_norm": 0.9806021451950073,
+      "learning_rate": 1.0594144251711994e-06,
+      "loss": 0.9190822601318359,
+      "step": 160
+    },
+    {
+      "epoch": 0.8302808302808303,
+      "grad_norm": 1.0123064517974854,
+      "learning_rate": 6.606877878829161e-07,
+      "loss": 0.9286371231079101,
+      "step": 170
+    },
+    {
+      "epoch": 0.8791208791208791,
+      "grad_norm": 1.079588770866394,
+      "learning_rate": 3.4933953919383984e-07,
+      "loss": 0.9289794921875,
+      "step": 180
+    },
+    {
+      "epoch": 0.927960927960928,
+      "grad_norm": 1.2275781631469727,
+      "learning_rate": 1.3351659874955546e-07,
+      "loss": 0.919953727722168,
+      "step": 190
+    },
+    {
+      "epoch": 0.9768009768009768,
+      "grad_norm": 1.0596216917037964,
+      "learning_rate": 1.8866315333544213e-08,
+      "loss": 0.9213088989257813,
+      "step": 200
+    },
+    {
+      "epoch": 1.0,
+      "step": 205,
+      "total_flos": 2.640143661303595e+18,
+      "train_loss": 0.9932206619076612,
+      "train_runtime": 1797.9362,
+      "train_samples_per_second": 7.286,
+      "train_steps_per_second": 0.114
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 205,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.640143661303595e+18,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/training_args.bin b/training_args.bin
new file mode 100644
index 0000000..c9a39a8
--- /dev/null
+++ b/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df97d1b346179757db27ce8b703af272dc95cec4c6ac81113071fb45d7156285
+size 6776
diff --git a/training_loss.png b/training_loss.png
new file mode 100644
index 0000000..4c41a3d
Binary files /dev/null and b/training_loss.png differ