初始化项目，由ModelHub XC社区提供模型

Model: hoornet/nives-fg-270m-v1 Source: Original Platform
2026-05-22 17:27:16 +08:00
commit 021c63722e
24 changed files with 2257 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,39 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+checkpoint-897/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
+nives-fg-270m-v1-F16.gguf filter=lfs diff=lfs merge=lfs -text
+nives-fg-270m-v1-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
--- a/README.md
+++ b/README.md
@@ -0,0 +1,58 @@
+---
+base_model: google/functiongemma-270m-it
+library_name: transformers
+model_name: fg270m-nives-ft-v1
+tags:
+- generated_from_trainer
+- trl
+- sft
+licence: license
+---
+
+# Model Card for fg270m-nives-ft-v1
+
+This model is a fine-tuned version of [google/functiongemma-270m-it](https://huggingface.co/google/functiongemma-270m-it).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+
+## Quick start
+
+```python
+from transformers import pipeline
+
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="None", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+
+## Training procedure
+
+ 
+
+
+
+This model was trained with SFT.
+
+### Framework versions
+
+- TRL: 1.3.0
+- Transformers: 5.7.0
+- Pytorch: 2.4.1+cu124
+- Datasets: 4.8.5
+- Tokenizers: 0.22.2
+
+## Citations
+
+
+
+Cite TRL as:
+    
+```bibtex
+@software{vonwerra2020trl,
+  title   = {{TRL: Transformers Reinforcement Learning}},
+  author  = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
+  license = {Apache-2.0},
+  url     = {https://github.com/huggingface/trl},
+  year    = {2020}
+}
+```
--- a/chat_template.jinja
+++ b/chat_template.jinja
@@ -0,0 +1,279 @@
+{%- macro format_parameters(properties, required) -%}
+    {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
+    {%- set ns = namespace(found_first=false) -%}
+    {%- for key, value in properties | dictsort -%}
+        {%- if key not in standard_keys -%}
+            {%- if ns.found_first %},{% endif -%}
+            {%- set ns.found_first = true -%}
+            {{- key }}:{description:<escape>{{ value['description'] }}<escape>
+            {%- if value['type'] | upper == 'STRING' -%}
+                {%- if value['enum'] -%}
+                    ,enum:{{ format_argument(value['enum']) }}
+                {%- endif -%}
+            {%- elif value['type'] | upper == 'OBJECT' -%}
+                ,properties:{
+                {%- if value['properties'] is defined and value['properties'] is mapping -%}
+                    {{- format_parameters(value['properties'], value['required'] | default([])) -}}
+                {%- elif value is mapping -%}
+                    {{- format_parameters(value, value['required'] | default([])) -}}
+                {%- endif -%}
+                }
+                {%- if value['required'] -%}
+                    ,required:[
+                    {%- for item in value['required'] | default([]) -%}
+                        <escape>{{- item -}}<escape>
+                        {%- if not loop.last %},{% endif -%}
+                    {%- endfor -%}
+                    ]
+                {%- endif -%}
+            {%- elif value['type'] | upper == 'ARRAY' -%}
+                {%- if value['items'] is mapping and value['items'] -%}
+                    ,items:{
+                    {%- set ns_items = namespace(found_first=false) -%}
+                    {%- for item_key, item_value in value['items'] | dictsort -%}
+                        {%- if item_value is not none -%}
+                            {%- if ns_items.found_first %},{% endif -%}
+                            {%- set ns_items.found_first = true -%}
+                            {%- if item_key == 'properties' -%}
+                                properties:{
+                                {%- if item_value is mapping -%}
+                                    {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
+                                {%- endif -%}
+                                }
+                            {%- elif item_key == 'required' -%}
+                                required:[
+                                {%- for req_item in item_value -%}
+                                    <escape>{{- req_item -}}<escape>
+                                    {%- if not loop.last %},{% endif -%}
+                                {%- endfor -%}
+                                ]
+                            {%- elif item_key == 'type' -%}
+                                {%- if item_value is string -%}
+                                    type:{{ format_argument(item_value | upper) }}
+                                {%- else -%}
+                                    type:{{ format_argument(item_value | map('upper') | list) }}
+                                {%- endif -%}
+                            {%- else -%}
+                                {{ item_key }}:{{ format_argument(item_value) }}
+                            {%- endif -%}
+                        {%- endif -%}
+                    {%- endfor -%}
+                    }
+                {%- endif -%}
+            {%- endif -%}
+            ,type:<escape>{{ value['type'] | upper }}<escape>}
+        {%- endif -%}
+    {%- endfor -%}
+{%- endmacro -%}
+{% macro format_function_declaration(tool_data) -%}
+declaration:{{- tool_data['function']['name'] -}}
+{description:<escape>{{- tool_data['function']['description'] -}}<escape>
+{%- set params = tool_data['function']['parameters'] -%}
+{%- if params -%}
+    ,parameters:{
+    {%- if params['properties'] -%}
+        properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
+    {%- endif -%}
+    {%- if params['required'] -%}
+        required:[
+        {%- for item in params['required'] -%}
+            <escape>{{- item -}}<escape>
+            {{- ',' if not loop.last -}}
+        {%- endfor -%}
+        ],
+    {%- endif -%}
+    {%- if params['type'] -%}
+        type:<escape>{{- params['type'] | upper -}}<escape>}
+    {%- endif -%}
+{%- endif -%}
+}
+{%- endmacro -%}
+{% macro format_argument(argument, escape_keys=True) -%}
+{%- if argument is string -%}
+    {{- '<escape>' + argument + '<escape>' -}}
+{%- elif argument is boolean -%}
+    {%- if argument -%}
+        {{- 'true' -}}
+    {%- else -%}
+        {{- 'false' -}}
+    {%- endif -%}
+{%- elif argument is mapping -%}
+    {{- '{' -}}
+    {%- set ns = namespace(found_first=false) -%}
+    {%- for key, value in argument | dictsort -%}
+        {%- if ns.found_first %},{% endif -%}
+        {%- set ns.found_first = true -%}
+        {%- if escape_keys -%}
+            {{- '<escape>' + key + '<escape>' -}}
+        {%- else -%}
+            {{- key -}}
+        {%- endif -%}
+        :{{- format_argument(value, escape_keys=escape_keys) -}}
+    {%- endfor -%}
+    {{- '}' -}}
+{%- elif argument is sequence -%}
+    {{- '[' -}}
+    {%- for item in argument -%}
+        {{- format_argument(item, escape_keys=escape_keys) -}}
+        {%- if not loop.last %},{% endif -%}
+    {%- endfor -%}
+    {{- ']' -}}
+{%- else -%}
+    {{- argument -}}
+{%- endif -%}
+{%- endmacro -%}
+{{ bos_token }}
+{%- set ns = namespace(prev_message_type=None) -%}
+{#- Tool Declarations -#}
+{%- set loop_messages = messages -%}
+{%- if tools or messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
+    {{- '<start_of_turn>developer\n' -}}
+    {%- if messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
+        {%- if messages[0]['content'] is string -%}
+            {{- messages[0]['content'] | trim -}}
+        {%- elif messages[0]['content'] is sequence -%}
+            {%- for item in messages[0]['content'] -%}
+                {%- if item['type'] == 'text' -%}
+                    {{- item['text'] | trim -}}
+                {%- endif -%}
+            {%- endfor -%}
+        {%- endif -%}
+        {%- set loop_messages = messages[1:] -%}
+    {%- endif -%}
+    {%- if tools -%}
+        {%- for tool in tools %}
+            {{- '<start_function_declaration>' -}}
+            {{- format_function_declaration(tool) | trim }}
+            {{- '<end_function_declaration>' -}}
+        {%- endfor %}
+    {%- endif -%}
+    {{- '<end_of_turn>\n' }}
+{%- endif %}
+{#- Loop through messages. -#}
+{%- for message in loop_messages -%}
+    {%- if (message['role'] == 'assistant') -%}
+        {#- Rename "assistant" to "model". -#}
+        {%- set role = "model" -%}
+    {%- else -%}
+        {%- set role = message['role'] -%}
+    {%- endif -%}
+    {%- if role != 'tool' -%}
+        {%- if ns.prev_message_type != 'tool_response' -%}
+            {{- '<start_of_turn>' + role + '\n' }}
+        {%- endif -%}
+        {%- set ns.prev_message_type = None -%}
+        {%- if 'content' in message and message['content'] is not none -%}
+            {%- if message['content'] is string -%}
+                {{ message['content'] | trim }}
+            {%- elif message['content'] is sequence -%}
+                {%- for item in message['content'] -%}
+                    {%- if item['type'] == 'image' -%}
+                        {{ '<start_of_image>' }}
+                    {%- elif item['type'] == 'text' -%}
+                        {{ item['text'] | trim }}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- else -%}
+                {{ raise_exception("Invalid content type in user/assistant message") }}
+            {%- endif -%}
+            {%- set ns.prev_message_type = 'content' -%}
+        {%- endif -%}
+        {%- if 'tool_calls' in message and message['tool_calls'] and message['tool_calls'] is iterable -%}
+            {#- Tool Calls -#}
+            {%- for tool_call in message['tool_calls'] -%}
+                {% set function = tool_call['function'] %}
+                {{-  '<start_function_call>call:' + function['name'] + '{' -}}
+                {%- if 'arguments' in function -%}
+                    {%- if function['arguments'] is mapping -%}
+                        {%- set ns = namespace(found_first=false) -%}
+                        {%- for key, value in function['arguments'] | dictsort -%}
+                            {%- if ns.found_first %},{% endif -%}
+                            {%- set ns.found_first = true -%}
+                            {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+                        {%- endfor -%}
+                    {%- elif function['arguments'] is string -%}
+                        {# This handles string-JSON, just in case #}
+                    {{ function['arguments'] }}
+                    {%- endif %}
+                {%- endif -%}
+                {{- '}<end_function_call>' -}}
+            {%- endfor -%}
+            {%- if loop.last -%}
+                {{ '<start_function_response>' }}
+            {%- endif -%}
+            {%- set ns.prev_message_type = 'tool_call' -%}
+        {%- endif -%}
+    {%- else -%}
+        {#- Tool Responses -#}
+        {%- if 'content' in message and message['content'] -%}
+            {%- if message['content'] is mapping -%}
+                {%- if 'name' in message['content'] and 'response' in message['content'] -%}
+                    {{ '<start_function_response>response:' + message['content']['name'] | trim + '{' }}
+                    {%- set response_ns = namespace(found_first=false) -%}
+                    {%- for key, value in message['content']['response'] | dictsort -%}
+                        {%- if response_ns.found_first %},{% endif -%}
+                        {%- set response_ns.found_first = true -%}
+                        {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+                    {%- endfor -%}
+                    {{- '}<end_function_response>' -}}
+                {%- elif 'name' in message -%}
+                    {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
+                    {%- set response_ns = namespace(found_first=false) -%}
+                    {%- for key, value in message['content'] | dictsort -%}
+                        {%- if response_ns.found_first %},{% endif -%}
+                        {%- set response_ns.found_first = true -%}
+                        {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+                    {%- endfor -%}
+                    {{- '}<end_function_response>' -}}
+                {%- else -%}
+                    {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
+                {%- endif -%}
+            {%- elif message['content'] is string -%}
+                {%- if 'name' in message -%}
+                     {{ '<start_function_response>response:' + message['name'] | trim + '{value:' + format_argument(message['content'], escape_keys=False) + '}<end_function_response>' }}
+                {%- else -%}
+                     {{ raise_exception("Invalid tool response: 'name' must be provided.") }}
+                {%- endif -%}
+            {%- elif message['content'] is sequence -%}
+                {%- for item in message['content'] -%}
+                    {%- if item is mapping -%}
+                        {%- if 'name' in item and 'response' in item -%}
+                            {{ '<start_function_response>response:' + item['name'] | trim + '{' }}
+                            {%- set response_ns = namespace(found_first=false) -%}
+                            {%- for key, value in item['response'] | dictsort -%}
+                                {%- if response_ns.found_first %},{% endif -%}
+                                {%- set response_ns.found_first = true -%}
+                                {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+                            {%- endfor -%}
+                            {{- '}<end_function_response>' -}}
+                        {%- elif 'name' in message -%}
+                            {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
+                            {%- set response_ns = namespace(found_first=false) -%}
+                            {%- for key, value in item | dictsort -%}
+                                {%- if response_ns.found_first %},{% endif -%}
+                                {%- set response_ns.found_first = true -%}
+                                {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+                            {%- endfor -%}
+                            {{- '}<end_function_response>' -}}
+                        {%- else -%}
+                            {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
+                        {%- endif -%}
+                    {%- else -%}
+                        {{ raise_exception("Invalid tool response message: multiple responses must all be mappings") }}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- else -%}
+                {{ raise_exception("Invalid content type in tool message: must be mapping, sequence of mappings, or string.") }}
+            {%- endif -%}
+        {%- endif -%}
+        {%- set ns.prev_message_type = 'tool_response' -%}
+    {%- endif -%}
+    {%- if ns.prev_message_type not in ['tool_call', 'tool_response'] -%}
+        {{ '<end_of_turn>\n' }}
+    {%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {%- if ns.prev_message_type != 'tool_response' -%}
+        {{- '<start_of_turn>model\n' -}}
+    {%- endif -%}
+{%- endif -%}
--- a/checkpoint-897/chat_template.jinja
+++ b/checkpoint-897/chat_template.jinja
@@ -0,0 +1,279 @@
+{%- macro format_parameters(properties, required) -%}
+    {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
+    {%- set ns = namespace(found_first=false) -%}
+    {%- for key, value in properties | dictsort -%}
+        {%- if key not in standard_keys -%}
+            {%- if ns.found_first %},{% endif -%}
+            {%- set ns.found_first = true -%}
+            {{- key }}:{description:<escape>{{ value['description'] }}<escape>
+            {%- if value['type'] | upper == 'STRING' -%}
+                {%- if value['enum'] -%}
+                    ,enum:{{ format_argument(value['enum']) }}
+                {%- endif -%}
+            {%- elif value['type'] | upper == 'OBJECT' -%}
+                ,properties:{
+                {%- if value['properties'] is defined and value['properties'] is mapping -%}
+                    {{- format_parameters(value['properties'], value['required'] | default([])) -}}
+                {%- elif value is mapping -%}
+                    {{- format_parameters(value, value['required'] | default([])) -}}
+                {%- endif -%}
+                }
+                {%- if value['required'] -%}
+                    ,required:[
+                    {%- for item in value['required'] | default([]) -%}
+                        <escape>{{- item -}}<escape>
+                        {%- if not loop.last %},{% endif -%}
+                    {%- endfor -%}
+                    ]
+                {%- endif -%}
+            {%- elif value['type'] | upper == 'ARRAY' -%}
+                {%- if value['items'] is mapping and value['items'] -%}
+                    ,items:{
+                    {%- set ns_items = namespace(found_first=false) -%}
+                    {%- for item_key, item_value in value['items'] | dictsort -%}
+                        {%- if item_value is not none -%}
+                            {%- if ns_items.found_first %},{% endif -%}
+                            {%- set ns_items.found_first = true -%}
+                            {%- if item_key == 'properties' -%}
+                                properties:{
+                                {%- if item_value is mapping -%}
+                                    {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
+                                {%- endif -%}
+                                }
+                            {%- elif item_key == 'required' -%}
+                                required:[
+                                {%- for req_item in item_value -%}
+                                    <escape>{{- req_item -}}<escape>
+                                    {%- if not loop.last %},{% endif -%}
+                                {%- endfor -%}
+                                ]
+                            {%- elif item_key == 'type' -%}
+                                {%- if item_value is string -%}
+                                    type:{{ format_argument(item_value | upper) }}
+                                {%- else -%}
+                                    type:{{ format_argument(item_value | map('upper') | list) }}
+                                {%- endif -%}
+                            {%- else -%}
+                                {{ item_key }}:{{ format_argument(item_value) }}
+                            {%- endif -%}
+                        {%- endif -%}
+                    {%- endfor -%}
+                    }
+                {%- endif -%}
+            {%- endif -%}
+            ,type:<escape>{{ value['type'] | upper }}<escape>}
+        {%- endif -%}
+    {%- endfor -%}
+{%- endmacro -%}
+{% macro format_function_declaration(tool_data) -%}
+declaration:{{- tool_data['function']['name'] -}}
+{description:<escape>{{- tool_data['function']['description'] -}}<escape>
+{%- set params = tool_data['function']['parameters'] -%}
+{%- if params -%}
+    ,parameters:{
+    {%- if params['properties'] -%}
+        properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
+    {%- endif -%}
+    {%- if params['required'] -%}
+        required:[
+        {%- for item in params['required'] -%}
+            <escape>{{- item -}}<escape>
+            {{- ',' if not loop.last -}}
+        {%- endfor -%}
+        ],
+    {%- endif -%}
+    {%- if params['type'] -%}
+        type:<escape>{{- params['type'] | upper -}}<escape>}
+    {%- endif -%}
+{%- endif -%}
+}
+{%- endmacro -%}
+{% macro format_argument(argument, escape_keys=True) -%}
+{%- if argument is string -%}
+    {{- '<escape>' + argument + '<escape>' -}}
+{%- elif argument is boolean -%}
+    {%- if argument -%}
+        {{- 'true' -}}
+    {%- else -%}
+        {{- 'false' -}}
+    {%- endif -%}
+{%- elif argument is mapping -%}
+    {{- '{' -}}
+    {%- set ns = namespace(found_first=false) -%}
+    {%- for key, value in argument | dictsort -%}
+        {%- if ns.found_first %},{% endif -%}
+        {%- set ns.found_first = true -%}
+        {%- if escape_keys -%}
+            {{- '<escape>' + key + '<escape>' -}}
+        {%- else -%}
+            {{- key -}}
+        {%- endif -%}
+        :{{- format_argument(value, escape_keys=escape_keys) -}}
+    {%- endfor -%}
+    {{- '}' -}}
+{%- elif argument is sequence -%}
+    {{- '[' -}}
+    {%- for item in argument -%}
+        {{- format_argument(item, escape_keys=escape_keys) -}}
+        {%- if not loop.last %},{% endif -%}
+    {%- endfor -%}
+    {{- ']' -}}
+{%- else -%}
+    {{- argument -}}
+{%- endif -%}
+{%- endmacro -%}
+{{ bos_token }}
+{%- set ns = namespace(prev_message_type=None) -%}
+{#- Tool Declarations -#}
+{%- set loop_messages = messages -%}
+{%- if tools or messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
+    {{- '<start_of_turn>developer\n' -}}
+    {%- if messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%}
+        {%- if messages[0]['content'] is string -%}
+            {{- messages[0]['content'] | trim -}}
+        {%- elif messages[0]['content'] is sequence -%}
+            {%- for item in messages[0]['content'] -%}
+                {%- if item['type'] == 'text' -%}
+                    {{- item['text'] | trim -}}
+                {%- endif -%}
+            {%- endfor -%}
+        {%- endif -%}
+        {%- set loop_messages = messages[1:] -%}
+    {%- endif -%}
+    {%- if tools -%}
+        {%- for tool in tools %}
+            {{- '<start_function_declaration>' -}}
+            {{- format_function_declaration(tool) | trim }}
+            {{- '<end_function_declaration>' -}}
+        {%- endfor %}
+    {%- endif -%}
+    {{- '<end_of_turn>\n' }}
+{%- endif %}
+{#- Loop through messages. -#}
+{%- for message in loop_messages -%}
+    {%- if (message['role'] == 'assistant') -%}
+        {#- Rename "assistant" to "model". -#}
+        {%- set role = "model" -%}
+    {%- else -%}
+        {%- set role = message['role'] -%}
+    {%- endif -%}
+    {%- if role != 'tool' -%}
+        {%- if ns.prev_message_type != 'tool_response' -%}
+            {{- '<start_of_turn>' + role + '\n' }}
+        {%- endif -%}
+        {%- set ns.prev_message_type = None -%}
+        {%- if 'content' in message and message['content'] is not none -%}
+            {%- if message['content'] is string -%}
+                {{ message['content'] | trim }}
+            {%- elif message['content'] is sequence -%}
+                {%- for item in message['content'] -%}
+                    {%- if item['type'] == 'image' -%}
+                        {{ '<start_of_image>' }}
+                    {%- elif item['type'] == 'text' -%}
+                        {{ item['text'] | trim }}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- else -%}
+                {{ raise_exception("Invalid content type in user/assistant message") }}
+            {%- endif -%}
+            {%- set ns.prev_message_type = 'content' -%}
+        {%- endif -%}
+        {%- if 'tool_calls' in message and message['tool_calls'] and message['tool_calls'] is iterable -%}
+            {#- Tool Calls -#}
+            {%- for tool_call in message['tool_calls'] -%}
+                {% set function = tool_call['function'] %}
+                {{-  '<start_function_call>call:' + function['name'] + '{' -}}
+                {%- if 'arguments' in function -%}
+                    {%- if function['arguments'] is mapping -%}
+                        {%- set ns = namespace(found_first=false) -%}
+                        {%- for key, value in function['arguments'] | dictsort -%}
+                            {%- if ns.found_first %},{% endif -%}
+                            {%- set ns.found_first = true -%}
+                            {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+                        {%- endfor -%}
+                    {%- elif function['arguments'] is string -%}
+                        {# This handles string-JSON, just in case #}
+                    {{ function['arguments'] }}
+                    {%- endif %}
+                {%- endif -%}
+                {{- '}<end_function_call>' -}}
+            {%- endfor -%}
+            {%- if loop.last -%}
+                {{ '<start_function_response>' }}
+            {%- endif -%}
+            {%- set ns.prev_message_type = 'tool_call' -%}
+        {%- endif -%}
+    {%- else -%}
+        {#- Tool Responses -#}
+        {%- if 'content' in message and message['content'] -%}
+            {%- if message['content'] is mapping -%}
+                {%- if 'name' in message['content'] and 'response' in message['content'] -%}
+                    {{ '<start_function_response>response:' + message['content']['name'] | trim + '{' }}
+                    {%- set response_ns = namespace(found_first=false) -%}
+                    {%- for key, value in message['content']['response'] | dictsort -%}
+                        {%- if response_ns.found_first %},{% endif -%}
+                        {%- set response_ns.found_first = true -%}
+                        {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+                    {%- endfor -%}
+                    {{- '}<end_function_response>' -}}
+                {%- elif 'name' in message -%}
+                    {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
+                    {%- set response_ns = namespace(found_first=false) -%}
+                    {%- for key, value in message['content'] | dictsort -%}
+                        {%- if response_ns.found_first %},{% endif -%}
+                        {%- set response_ns.found_first = true -%}
+                        {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+                    {%- endfor -%}
+                    {{- '}<end_function_response>' -}}
+                {%- else -%}
+                    {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
+                {%- endif -%}
+            {%- elif message['content'] is string -%}
+                {%- if 'name' in message -%}
+                     {{ '<start_function_response>response:' + message['name'] | trim + '{value:' + format_argument(message['content'], escape_keys=False) + '}<end_function_response>' }}
+                {%- else -%}
+                     {{ raise_exception("Invalid tool response: 'name' must be provided.") }}
+                {%- endif -%}
+            {%- elif message['content'] is sequence -%}
+                {%- for item in message['content'] -%}
+                    {%- if item is mapping -%}
+                        {%- if 'name' in item and 'response' in item -%}
+                            {{ '<start_function_response>response:' + item['name'] | trim + '{' }}
+                            {%- set response_ns = namespace(found_first=false) -%}
+                            {%- for key, value in item['response'] | dictsort -%}
+                                {%- if response_ns.found_first %},{% endif -%}
+                                {%- set response_ns.found_first = true -%}
+                                {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+                            {%- endfor -%}
+                            {{- '}<end_function_response>' -}}
+                        {%- elif 'name' in message -%}
+                            {{ '<start_function_response>response:' + message['name'] | trim + '{' }}
+                            {%- set response_ns = namespace(found_first=false) -%}
+                            {%- for key, value in item | dictsort -%}
+                                {%- if response_ns.found_first %},{% endif -%}
+                                {%- set response_ns.found_first = true -%}
+                                {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+                            {%- endfor -%}
+                            {{- '}<end_function_response>' -}}
+                        {%- else -%}
+                            {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }}
+                        {%- endif -%}
+                    {%- else -%}
+                        {{ raise_exception("Invalid tool response message: multiple responses must all be mappings") }}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- else -%}
+                {{ raise_exception("Invalid content type in tool message: must be mapping, sequence of mappings, or string.") }}
+            {%- endif -%}
+        {%- endif -%}
+        {%- set ns.prev_message_type = 'tool_response' -%}
+    {%- endif -%}
+    {%- if ns.prev_message_type not in ['tool_call', 'tool_response'] -%}
+        {{ '<end_of_turn>\n' }}
+    {%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {%- if ns.prev_message_type != 'tool_response' -%}
+        {{- '<start_of_turn>model\n' -}}
+    {%- endif -%}
+{%- endif -%}
--- a/checkpoint-897/config.json
+++ b/checkpoint-897/config.json
@@ -0,0 +1,62 @@
+{
+  "_sliding_window_pattern": 6,
+  "architectures": [
+    "Gemma3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attn_logit_softcapping": null,
+  "bos_token_id": 2,
+  "dtype": "bfloat16",
+  "eos_token_id": 1,
+  "final_logit_softcapping": null,
+  "head_dim": 256,
+  "hidden_activation": "gelu_pytorch_tanh",
+  "hidden_size": 640,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "layer_types": [
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "model_type": "gemma3_text",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 18,
+  "num_key_value_heads": 1,
+  "pad_token_id": 0,
+  "query_pre_attn_scalar": 256,
+  "rms_norm_eps": 1e-06,
+  "rope_parameters": {
+    "full_attention": {
+      "rope_theta": 1000000.0,
+      "rope_type": "default"
+    },
+    "sliding_attention": {
+      "rope_theta": 10000.0,
+      "rope_type": "default"
+    }
+  },
+  "sliding_window": 512,
+  "tie_word_embeddings": true,
+  "transformers_version": "5.7.0",
+  "use_bidirectional_attention": false,
+  "use_cache": false,
+  "vocab_size": 262144
+}
--- a/checkpoint-897/generation_config.json
+++ b/checkpoint-897/generation_config.json
@@ -0,0 +1,15 @@
+{
+  "bos_token_id": 2,
+  "cache_implementation": "hybrid",
+  "do_sample": true,
+  "eos_token_id": [
+    1,
+    1,
+    50,
+    106
+  ],
+  "pad_token_id": 0,
+  "top_k": 64,
+  "top_p": 0.95,
+  "transformers_version": "5.7.0"
+}
--- a/checkpoint-897/model.safetensors
+++ b/checkpoint-897/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efbb32f4990b6a076d86e5dc1b83f90236f9f459ac4cdb44d1847818f2d99171
+size 536223056
--- a/checkpoint-897/optimizer.pt
+++ b/checkpoint-897/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58b98cc8495901de6250e6e1addaf4bdf6122996dba38605f9fbd5512d915db8
+size 1072593978
--- a/checkpoint-897/rng_state.pth
+++ b/checkpoint-897/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ff264f99d31b522cc7e2a4eac9d38606d0c58a34c0adc74d71e0ca8b371dc36
+size 14244
--- a/checkpoint-897/scheduler.pt
+++ b/checkpoint-897/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62894fde7f58d2738c7681577552abca250a7b16b270ec0708d33286c8de9398
+size 1064
--- a/checkpoint-897/structured_eval.json
+++ b/checkpoint-897/structured_eval.json
@@ -0,0 +1,423 @@
+{
+  "step": 897,
+  "metrics": {
+    "n_total": 32,
+    "n_tool_call": 22,
+    "n_chat_only": 10,
+    "emit_rate": 1.0,
+    "schema_validity": 0.9090909090909091,
+    "escalation_correct": 1.0,
+    "argument_accuracy": 0.393939393939394,
+    "by_category": {
+      "turn_on_light": {
+        "n": 3,
+        "ok": 3,
+        "pct": 1.0
+      },
+      "turn_off_light": {
+        "n": 3,
+        "ok": 3,
+        "pct": 1.0
+      },
+      "dim_light": {
+        "n": 2,
+        "ok": 2,
+        "pct": 1.0
+      },
+      "set_color": {
+        "n": 2,
+        "ok": 2,
+        "pct": 1.0
+      },
+      "get_state": {
+        "n": 2,
+        "ok": 2,
+        "pct": 1.0
+      },
+      "get_state_sensor": {
+        "n": 2,
+        "ok": 2,
+        "pct": 1.0
+      },
+      "search_devices": {
+        "n": 2,
+        "ok": 2,
+        "pct": 1.0
+      },
+      "list_in_area": {
+        "n": 2,
+        "ok": 0,
+        "pct": 0.0
+      },
+      "history_sensor": {
+        "n": 2,
+        "ok": 2,
+        "pct": 1.0
+      },
+      "complex_reasoning": {
+        "n": 2,
+        "ok": 2,
+        "pct": 1.0
+      },
+      "chat_greeting": {
+        "n": 2,
+        "ok": 2,
+        "pct": 1.0
+      },
+      "chat_ack": {
+        "n": 2,
+        "ok": 2,
+        "pct": 1.0
+      },
+      "chat_out_of_scope": {
+        "n": 2,
+        "ok": 2,
+        "pct": 1.0
+      },
+      "chat_ambiguous": {
+        "n": 2,
+        "ok": 2,
+        "pct": 1.0
+      },
+      "chat_capability": {
+        "n": 2,
+        "ok": 2,
+        "pct": 1.0
+      }
+    }
+  },
+  "results": [
+    {
+      "id": "tc_turn_on_light_001",
+      "category": "turn_on_light",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "call_service",
+      "schema_valid": true,
+      "argument_accuracy": 0.6666666666666666,
+      "raw": "<start_function_call>call:call_service{domain:<escape>light<escape>,entity_id:<escape>light.living_room_ceiling_light<escape>,service:<escape>turn_on<escape>}<end_function_call>",
+      "latency_s": 1.08
+    },
+    {
+      "id": "tc_turn_on_light_002",
+      "category": "turn_on_light",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "call_service",
+      "schema_valid": true,
+      "argument_accuracy": 0.6666666666666666,
+      "raw": "<start_function_call>call:call_service{domain:<escape>light<escape>,entity_id:<escape>light.bedroom_ceiling_light<escape>,service:<escape>turn_on<escape>}<end_function_call>",
+      "latency_s": 0.97
+    },
+    {
+      "id": "tc_turn_on_light_003",
+      "category": "turn_on_light",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "call_service",
+      "schema_valid": true,
+      "argument_accuracy": 0.6666666666666666,
+      "raw": "<start_function_call>call:call_service{domain:<escape>light<escape>,entity_id:<escape>light.desk_desk_downlight<escape>,service:<escape>turn_on<escape>}<end_function_call>",
+      "latency_s": 1.0
+    },
+    {
+      "id": "tc_turn_off_light_001",
+      "category": "turn_off_light",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "call_service",
+      "schema_valid": true,
+      "argument_accuracy": 1.0,
+      "raw": "<start_function_call>call:call_service{domain:<escape>light<escape>,entity_id:<escape>light.kitchen_pendant<escape>,service:<escape>turn_off<escape>}<end_function_call>",
+      "latency_s": 0.94
+    },
+    {
+      "id": "tc_turn_off_light_002",
+      "category": "turn_off_light",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "call_service",
+      "schema_valid": true,
+      "argument_accuracy": 1.0,
+      "raw": "<start_function_call>call:call_service{domain:<escape>light<escape>,entity_id:<escape>light.bedroom_ceiling_light<escape>,service:<escape>turn_off<escape>}<end_function_call>",
+      "latency_s": 0.97
+    },
+    {
+      "id": "tc_turn_off_light_003",
+      "category": "turn_off_light",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "call_service",
+      "schema_valid": true,
+      "argument_accuracy": 0.6666666666666666,
+      "raw": "<start_function_call>call:call_service{domain:<escape>light<escape>,entity_id:<escape>light.kids_night_light<escape>,service:<escape>turn_off<escape>}<end_function_call>",
+      "latency_s": 0.97
+    },
+    {
+      "id": "tc_dim_light_001",
+      "category": "dim_light",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "call_service",
+      "schema_valid": true,
+      "argument_accuracy": 0.0,
+      "raw": "<start_function_call>call:call_service{data:{brightness_pct:30},domain:<escape>light<escape>,entity_id:<escape>light.bedroom_lamp<escape>,service:<escape>turn_on<escape>}<end_function_call>",
+      "latency_s": 1.15
+    },
+    {
+      "id": "tc_dim_light_002",
+      "category": "dim_light",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "call_service",
+      "schema_valid": true,
+      "argument_accuracy": 0.0,
+      "raw": "<start_function_call>call:call_service{data:{brightness_pct:70},domain:<escape>light<escape>,entity_id:<escape>light.kitchen_island<escape>,service:<escape>turn_on<escape>}<end_function_call>",
+      "latency_s": 1.15
+    },
+    {
+      "id": "tc_set_color_001",
+      "category": "set_color",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "call_service",
+      "schema_valid": true,
+      "argument_accuracy": 0.0,
+      "raw": "<start_function_call>call:call_service{data:{rgb_color:[255,0,0]},domain:<escape>light<escape>,entity_id:<escape>light.living_room_main_light<escape>,service:<escape>turn_on<escape>}<end_function_call>",
+      "latency_s": 1.39
+    },
+    {
+      "id": "tc_set_color_002",
+      "category": "set_color",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "call_service",
+      "schema_valid": true,
+      "argument_accuracy": 0.0,
+      "raw": "<start_function_call>call:call_service{data:{rgb_color:[255,0,0]},domain:<escape>light<escape>,entity_id:<escape>light.bedroom_night_light<escape>,service:<escape>turn_on<escape>}<end_function_call>",
+      "latency_s": 1.34
+    },
+    {
+      "id": "tc_get_state_001",
+      "category": "get_state",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "get_state",
+      "schema_valid": true,
+      "argument_accuracy": 1.0,
+      "raw": "<start_function_call>call:get_state{entity_id:<escape>switch.bedroom_fan<escape>}<end_function_call>",
+      "latency_s": 0.54
+    },
+    {
+      "id": "tc_get_state_002",
+      "category": "get_state",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "get_state",
+      "schema_valid": true,
+      "argument_accuracy": 0.0,
+      "raw": "<start_function_call>call:get_state{entity_id:<escape>climate.master_bedroom_mini_split<escape>}<end_function_call>",
+      "latency_s": 0.65
+    },
+    {
+      "id": "tc_get_state_sensor_001",
+      "category": "get_state_sensor",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "get_state",
+      "schema_valid": true,
+      "argument_accuracy": 0.0,
+      "raw": "<start_function_call>call:get_state{entity_id:<escape>sensor.living_room_temperature_sensor<escape>}<end_function_call>",
+      "latency_s": 0.65
+    },
+    {
+      "id": "tc_get_state_sensor_002",
+      "category": "get_state_sensor",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "get_state",
+      "schema_valid": true,
+      "argument_accuracy": 0.0,
+      "raw": "<start_function_call>call:get_state{entity_id:<escape>sensor.kitchen_humidity_sensor<escape>}<end_function_call>",
+      "latency_s": 0.6
+    },
+    {
+      "id": "tc_search_001",
+      "category": "search_devices",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "search_entities",
+      "schema_valid": true,
+      "argument_accuracy": 0.0,
+      "raw": "<start_function_call>call:search_entities{query:<escape>sensors<escape>}<end_function_call>",
+      "latency_s": 0.38
+    },
+    {
+      "id": "tc_search_002",
+      "category": "search_devices",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "search_entities",
+      "schema_valid": true,
+      "argument_accuracy": 1.0,
+      "raw": "<start_function_call>call:search_entities{query:<escape>blinds<escape>}<end_function_call>",
+      "latency_s": 0.41
+    },
+    {
+      "id": "tc_list_001",
+      "category": "list_in_area",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "search_entities",
+      "schema_valid": false,
+      "argument_accuracy": 0.0,
+      "raw": "<start_function_call>call:search_entities{query:<escape>lights<escape>}<end_function_call>",
+      "latency_s": 0.38
+    },
+    {
+      "id": "tc_list_002",
+      "category": "list_in_area",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "search_entities",
+      "schema_valid": false,
+      "argument_accuracy": 0.0,
+      "raw": "<start_function_call>call:search_entities{query:<escape>climate<escape>}<end_function_call>",
+      "latency_s": 0.38
+    },
+    {
+      "id": "tc_history_001",
+      "category": "history_sensor",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "get_history",
+      "schema_valid": true,
+      "argument_accuracy": 0.0,
+      "raw": "<start_function_call>call:get_history{end_time:<escape>2026-05-02T00:00:00Z<escape>,entity_id:<escape>sensor.living_room_temperature_sensor<escape>,start_time:<escape>2026-05-01T00:00:00Z<escape>}<end_function_call>",
+      "latency_s": 2.06
+    },
+    {
+      "id": "tc_history_002",
+      "category": "history_sensor",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "get_history",
+      "schema_valid": true,
+      "argument_accuracy": 0.0,
+      "raw": "<start_function_call>call:get_history{end_time:<escape>2026-05-02T00:00:00Z<escape>,entity_id:<escape>sensor.living_room_co2_sensor<escape>,start_time:<escape>2026-05-01T00:00:00Z<escape>}<end_function_call>",
+      "latency_s": 2.12
+    },
+    {
+      "id": "tc_complex_001",
+      "category": "complex_reasoning",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "escalate_to_sonnet",
+      "schema_valid": true,
+      "argument_accuracy": 1.0,
+      "raw": "<start_function_call>call:escalate_to_sonnet{reason:<escape>complex_query<escape>}<end_function_call>",
+      "latency_s": 0.55
+    },
+    {
+      "id": "tc_complex_002",
+      "category": "complex_reasoning",
+      "type": "tool_call",
+      "emitted_call": true,
+      "called_name": "escalate_to_sonnet",
+      "schema_valid": true,
+      "argument_accuracy": 1.0,
+      "raw": "<start_function_call>call:escalate_to_sonnet{reason:<escape>complex_query<escape>}<end_function_call>",
+      "latency_s": 0.54
+    },
+    {
+      "id": "co_greet_001",
+      "category": "chat_greeting",
+      "type": "chat_only",
+      "emitted_call": false,
+      "escalation_correct": true,
+      "raw": "Hi! What can I do for you?<end_of_turn>",
+      "latency_s": 0.28
+    },
+    {
+      "id": "co_greet_002",
+      "category": "chat_greeting",
+      "type": "chat_only",
+      "emitted_call": false,
+      "escalation_correct": true,
+      "raw": "Hello! How can I help with your smart home today?<end_of_turn>",
+      "latency_s": 0.36
+    },
+    {
+      "id": "co_ack_001",
+      "category": "chat_ack",
+      "type": "chat_only",
+      "emitted_call": false,
+      "escalation_correct": true,
+      "raw": "Sure thing.<end_of_turn>",
+      "latency_s": 0.12
+    },
+    {
+      "id": "co_ack_002",
+      "category": "chat_ack",
+      "type": "chat_only",
+      "emitted_call": false,
+      "escalation_correct": true,
+      "raw": "Anytime.<end_of_turn>",
+      "latency_s": 0.11
+    },
+    {
+      "id": "co_oos_001",
+      "category": "chat_out_of_scope",
+      "type": "chat_only",
+      "emitted_call": false,
+      "escalation_correct": true,
+      "raw": "I can't help with that, but I can turn on lights, set scenes, or check what's going on around the house.<end_of_turn>",
+      "latency_s": 0.8
+    },
+    {
+      "id": "co_oos_002",
+      "category": "chat_out_of_scope",
+      "type": "chat_only",
+      "emitted_call": false,
+      "escalation_correct": true,
+      "raw": "I'm focused on your smart home \u2014 for that one you'll want a different assistant.<end_of_turn>",
+      "latency_s": 0.56
+    },
+    {
+      "id": "co_amb_001",
+      "category": "chat_ambiguous",
+      "type": "chat_only",
+      "emitted_call": false,
+      "escalation_correct": true,
+      "raw": "Sure \u2014 could you tell me a bit more about what you'd like? For example, I can turn on lights, set a thermostat, or check a sensor.<end_of_turn>",
+      "latency_s": 0.93
+    },
+    {
+      "id": "co_amb_002",
+      "category": "chat_ambiguous",
+      "type": "chat_only",
+      "emitted_call": false,
+      "escalation_correct": true,
+      "raw": "I can do plenty around the house \u2014 could you point me at a specific room or device?<end_of_turn>",
+      "latency_s": 0.54
+    },
+    {
+      "id": "co_cap_001",
+      "category": "chat_capability",
+      "type": "chat_only",
+      "emitted_call": false,
+      "escalation_correct": true,
+      "raw": "I control your smart home \u2014 lights, switches, climate, covers, media players \u2014 and I can read sensors and look up history. Just tell me what you want.<end_of_turn>",
+      "latency_s": 0.95
+    },
+    {
+      "id": "co_cap_002",
+      "category": "chat_capability",
+      "type": "chat_only",
+      "emitted_call": false,
+      "escalation_correct": true,
+      "raw": "I'm a smart home assistant connected to Home Assistant. I can turn devices on and off, dim lights, set thermostats, open and close covers, and read sensor data. Try asking me to turn on a light.<end_of_turn>",
+      "latency_s": 1.23
+    }
+  ]
+}
--- a/checkpoint-897/tokenizer.json
+++ b/checkpoint-897/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80d7f800b949accd7eb940bac75e642f9468e4df157403032a55bf54ed23b650
+size 33384898
--- a/checkpoint-897/tokenizer_config.json
+++ b/checkpoint-897/tokenizer_config.json
@@ -0,0 +1,27 @@
+{
+  "backend": "tokenizers",
+  "boi_token": "<start_of_image>",
+  "bos_token": "<bos>",
+  "clean_up_tokenization_spaces": false,
+  "eoi_token": "<end_of_image>",
+  "eos_token": "<eos>",
+  "image_token": "<image_soft_token>",
+  "is_local": false,
+  "local_files_only": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "boi_token": "<start_of_image>",
+    "eoi_token": "<end_of_image>",
+    "image_token": "<image_soft_token>",
+    "sfr_token": "<start_function_response>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "sfr_token": "<start_function_response>",
+  "sp_model_kwargs": null,
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}
--- a/checkpoint-897/trainer_state.json
+++ b/checkpoint-897/trainer_state.json
@@ -0,0 +1,935 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 897,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "entropy": 0.8498557328246534,
+      "epoch": 0.011154489682097044,
+      "grad_norm": 44.5,
+      "learning_rate": 1e-05,
+      "loss": 3.704855728149414,
+      "mean_token_accuracy": 0.5304131177254021,
+      "num_tokens": 833799.0,
+      "step": 10
+    },
+    {
+      "entropy": 1.765550174564123,
+      "epoch": 0.022308979364194088,
+      "grad_norm": 10.25,
+      "learning_rate": 2.111111111111111e-05,
+      "loss": 1.9117172241210938,
+      "mean_token_accuracy": 0.6469556039199233,
+      "num_tokens": 1671910.0,
+      "step": 20
+    },
+    {
+      "entropy": 0.8713466321351007,
+      "epoch": 0.03346346904629113,
+      "grad_norm": 3.78125,
+      "learning_rate": 3.222222222222223e-05,
+      "loss": 0.8604758262634278,
+      "mean_token_accuracy": 0.8343592453747988,
+      "num_tokens": 2510443.0,
+      "step": 30
+    },
+    {
+      "entropy": 0.23340068878605963,
+      "epoch": 0.044617958728388175,
+      "grad_norm": 1.953125,
+      "learning_rate": 4.3333333333333334e-05,
+      "loss": 0.24464244842529298,
+      "mean_token_accuracy": 0.9471705831587315,
+      "num_tokens": 3345229.0,
+      "step": 40
+    },
+    {
+      "entropy": 0.16152286342112349,
+      "epoch": 0.05577244841048522,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9997280790439974e-05,
+      "loss": 0.17044107913970946,
+      "mean_token_accuracy": 0.9553312979638576,
+      "num_tokens": 4184871.0,
+      "step": 50
+    },
+    {
+      "entropy": 0.1448873324552551,
+      "epoch": 0.06692693809258227,
+      "grad_norm": 0.984375,
+      "learning_rate": 4.996669647581318e-05,
+      "loss": 0.14741255044937135,
+      "mean_token_accuracy": 0.9585917994379998,
+      "num_tokens": 5039068.0,
+      "step": 60
+    },
+    {
+      "entropy": 0.12383970170631073,
+      "epoch": 0.0780814277746793,
+      "grad_norm": 1.125,
+      "learning_rate": 4.990217055187362e-05,
+      "loss": 0.12599575519561768,
+      "mean_token_accuracy": 0.9621855434030294,
+      "num_tokens": 5904036.0,
+      "step": 70
+    },
+    {
+      "entropy": 0.11301726293168031,
+      "epoch": 0.08923591745677635,
+      "grad_norm": 1.3828125,
+      "learning_rate": 4.980379074002661e-05,
+      "loss": 0.11982399225234985,
+      "mean_token_accuracy": 0.964000066742301,
+      "num_tokens": 6747251.0,
+      "step": 80
+    },
+    {
+      "entropy": 0.10463761446881108,
+      "epoch": 0.1003904071388734,
+      "grad_norm": 0.71484375,
+      "learning_rate": 4.967169078520476e-05,
+      "loss": 0.11220132112503052,
+      "mean_token_accuracy": 0.9658373668789864,
+      "num_tokens": 7572769.0,
+      "step": 90
+    },
+    {
+      "entropy": 0.10532618285797071,
+      "epoch": 0.11154489682097044,
+      "grad_norm": 0.9296875,
+      "learning_rate": 4.9506050274045076e-05,
+      "loss": 0.11419826745986938,
+      "mean_token_accuracy": 0.965171106159687,
+      "num_tokens": 8410764.0,
+      "step": 100
+    },
+    {
+      "entropy": 0.10305561173590831,
+      "epoch": 0.12269938650306748,
+      "grad_norm": 0.89453125,
+      "learning_rate": 4.930709439074528e-05,
+      "loss": 0.10990087985992432,
+      "mean_token_accuracy": 0.9659170717000961,
+      "num_tokens": 9255104.0,
+      "step": 110
+    },
+    {
+      "entropy": 0.10211670671415049,
+      "epoch": 0.13385387618516453,
+      "grad_norm": 0.76953125,
+      "learning_rate": 4.90750936109315e-05,
+      "loss": 0.11037271022796631,
+      "mean_token_accuracy": 0.966075143776834,
+      "num_tokens": 10103000.0,
+      "step": 120
+    },
+    {
+      "entropy": 0.09717915701621678,
+      "epoch": 0.14500836586726157,
+      "grad_norm": 0.73046875,
+      "learning_rate": 4.881036333395329e-05,
+      "loss": 0.10295262336730956,
+      "mean_token_accuracy": 0.9677550513297319,
+      "num_tokens": 10945768.0,
+      "step": 130
+    },
+    {
+      "entropy": 0.09635820150724612,
+      "epoch": 0.1561628555493586,
+      "grad_norm": 0.7265625,
+      "learning_rate": 4.851326345410594e-05,
+      "loss": 0.10199121236801148,
+      "mean_token_accuracy": 0.9681327627971769,
+      "num_tokens": 11801129.0,
+      "step": 140
+    },
+    {
+      "entropy": 0.09002169943414629,
+      "epoch": 0.16731734523145567,
+      "grad_norm": 0.98046875,
+      "learning_rate": 4.818419787136311e-05,
+      "loss": 0.09567687511444092,
+      "mean_token_accuracy": 0.9701874911785126,
+      "num_tokens": 12636424.0,
+      "step": 150
+    },
+    {
+      "entropy": 0.09068954657413997,
+      "epoch": 0.1784718349135527,
+      "grad_norm": 1.1015625,
+      "learning_rate": 4.782361394228472e-05,
+      "loss": 0.0969263732433319,
+      "mean_token_accuracy": 0.9701515214517713,
+      "num_tokens": 13490177.0,
+      "step": 160
+    },
+    {
+      "entropy": 0.08610689677589108,
+      "epoch": 0.18962632459564974,
+      "grad_norm": 0.96484375,
+      "learning_rate": 4.74320018718467e-05,
+      "loss": 0.08986451625823974,
+      "mean_token_accuracy": 0.9720516135916114,
+      "num_tokens": 14328572.0,
+      "step": 170
+    },
+    {
+      "entropy": 0.08233372264367063,
+      "epoch": 0.2007808142777468,
+      "grad_norm": 1.890625,
+      "learning_rate": 4.700989404701941e-05,
+      "loss": 0.08586806058883667,
+      "mean_token_accuracy": 0.9734413396567106,
+      "num_tokens": 15189703.0,
+      "step": 180
+    },
+    {
+      "entropy": 0.07736555864394176,
+      "epoch": 0.21193530395984383,
+      "grad_norm": 1.28125,
+      "learning_rate": 4.6557864313000695e-05,
+      "loss": 0.07913717031478881,
+      "mean_token_accuracy": 0.9759283743798732,
+      "num_tokens": 16028746.0,
+      "step": 190
+    },
+    {
+      "entropy": 0.07013691037718672,
+      "epoch": 0.22308979364194087,
+      "grad_norm": 1.1953125,
+      "learning_rate": 4.60765271930874e-05,
+      "loss": 0.07151558399200439,
+      "mean_token_accuracy": 0.978206392377615,
+      "num_tokens": 16849567.0,
+      "step": 200
+    },
+    {
+      "entropy": 0.07075640709081199,
+      "epoch": 0.23424428332403793,
+      "grad_norm": 1.4921875,
+      "learning_rate": 4.55665370532461e-05,
+      "loss": 0.07073653936386108,
+      "mean_token_accuracy": 0.9784815656021237,
+      "num_tokens": 17705656.0,
+      "step": 210
+    },
+    {
+      "entropy": 0.06637019099725876,
+      "epoch": 0.24539877300613497,
+      "grad_norm": 1.28125,
+      "learning_rate": 4.5028587212518705e-05,
+      "loss": 0.06597371697425843,
+      "mean_token_accuracy": 0.979678837954998,
+      "num_tokens": 18565065.0,
+      "step": 220
+    },
+    {
+      "entropy": 0.05923618896049447,
+      "epoch": 0.25655326268823203,
+      "grad_norm": 1.2734375,
+      "learning_rate": 4.4463409000472234e-05,
+      "loss": 0.058509671688079835,
+      "mean_token_accuracy": 0.9822878390550613,
+      "num_tokens": 19392725.0,
+      "step": 230
+    },
+    {
+      "entropy": 0.05874249367916491,
+      "epoch": 0.26770775237032907,
+      "grad_norm": 1.171875,
+      "learning_rate": 4.3871770762974306e-05,
+      "loss": 0.05813463926315308,
+      "mean_token_accuracy": 0.9824939148500562,
+      "num_tokens": 20223583.0,
+      "step": 240
+    },
+    {
+      "entropy": 0.05689131756371353,
+      "epoch": 0.2788622420524261,
+      "grad_norm": 1.4609375,
+      "learning_rate": 4.325447681764586e-05,
+      "loss": 0.055121219158172606,
+      "mean_token_accuracy": 0.9830958772450685,
+      "num_tokens": 21085157.0,
+      "step": 250
+    },
+    {
+      "entropy": 0.05158787120017223,
+      "epoch": 0.29001673173452314,
+      "grad_norm": 1.0390625,
+      "learning_rate": 4.261236636041108e-05,
+      "loss": 0.05031982660293579,
+      "mean_token_accuracy": 0.984761236794293,
+      "num_tokens": 21904752.0,
+      "step": 260
+    },
+    {
+      "entropy": 0.05179886775440536,
+      "epoch": 0.30117122141662017,
+      "grad_norm": 0.78515625,
+      "learning_rate": 4.194631232463128e-05,
+      "loss": 0.0493065744638443,
+      "mean_token_accuracy": 0.9848343567922712,
+      "num_tokens": 22765981.0,
+      "step": 270
+    },
+    {
+      "entropy": 0.049823664524592456,
+      "epoch": 0.3123257110987172,
+      "grad_norm": 0.62890625,
+      "learning_rate": 4.1257220194373424e-05,
+      "loss": 0.04740493595600128,
+      "mean_token_accuracy": 0.9855156386271119,
+      "num_tokens": 23621926.0,
+      "step": 280
+    },
+    {
+      "entropy": 0.04758051415265072,
+      "epoch": 0.3234802007808143,
+      "grad_norm": 0.5,
+      "learning_rate": 4.054602677342684e-05,
+      "loss": 0.04637431204319,
+      "mean_token_accuracy": 0.9858794504776597,
+      "num_tokens": 24465607.0,
+      "step": 290
+    },
+    {
+      "entropy": 0.04668422270915471,
+      "epoch": 0.33463469046291133,
+      "grad_norm": 0.474609375,
+      "learning_rate": 3.981369891174155e-05,
+      "loss": 0.04507455825805664,
+      "mean_token_accuracy": 0.9860366908833385,
+      "num_tokens": 25299933.0,
+      "step": 300
+    },
+    {
+      "entropy": 0.04640703263867181,
+      "epoch": 0.34578918014500837,
+      "grad_norm": 0.390625,
+      "learning_rate": 3.906123219101952e-05,
+      "loss": 0.04516075849533081,
+      "mean_token_accuracy": 0.9859529983252286,
+      "num_tokens": 26154681.0,
+      "step": 310
+    },
+    {
+      "entropy": 0.04438853256579023,
+      "epoch": 0.3569436698271054,
+      "grad_norm": 0.423828125,
+      "learning_rate": 3.8289649571245885e-05,
+      "loss": 0.044096818566322325,
+      "mean_token_accuracy": 0.986466808244586,
+      "num_tokens": 26983541.0,
+      "step": 320
+    },
+    {
+      "entropy": 0.044660908347577785,
+      "epoch": 0.36809815950920244,
+      "grad_norm": 0.376953125,
+      "learning_rate": 3.7500000000000003e-05,
+      "loss": 0.043841251730918886,
+      "mean_token_accuracy": 0.986228640563786,
+      "num_tokens": 27833414.0,
+      "step": 330
+    },
+    {
+      "entropy": 0.043841811595484614,
+      "epoch": 0.3792526491912995,
+      "grad_norm": 0.328125,
+      "learning_rate": 3.669335698643704e-05,
+      "loss": 0.04326000213623047,
+      "mean_token_accuracy": 0.9865183688700199,
+      "num_tokens": 28677577.0,
+      "step": 340
+    },
+    {
+      "entropy": 0.04371235728031024,
+      "epoch": 0.39040713887339656,
+      "grad_norm": 0.390625,
+      "learning_rate": 3.587081714187874e-05,
+      "loss": 0.043233224749565126,
+      "mean_token_accuracy": 0.9865481941029429,
+      "num_tokens": 29512895.0,
+      "step": 350
+    },
+    {
+      "entropy": 0.04327065504912753,
+      "epoch": 0.4015616285554936,
+      "grad_norm": 0.330078125,
+      "learning_rate": 3.503349868899722e-05,
+      "loss": 0.04288822710514069,
+      "mean_token_accuracy": 0.986665309779346,
+      "num_tokens": 30343018.0,
+      "step": 360
+    },
+    {
+      "entropy": 0.04256358170532622,
+      "epoch": 0.41271611823759063,
+      "grad_norm": 0.376953125,
+      "learning_rate": 3.418253994161892e-05,
+      "loss": 0.042832252383232114,
+      "mean_token_accuracy": 0.9867880517616868,
+      "num_tokens": 31158940.0,
+      "step": 370
+    },
+    {
+      "entropy": 0.04262932341953274,
+      "epoch": 0.42387060791968767,
+      "grad_norm": 0.30078125,
+      "learning_rate": 3.3319097757214843e-05,
+      "loss": 0.04222110211849213,
+      "mean_token_accuracy": 0.9866109801456332,
+      "num_tokens": 32008434.0,
+      "step": 380
+    },
+    {
+      "entropy": 0.04266308699734509,
+      "epoch": 0.4350250976017847,
+      "grad_norm": 0.314453125,
+      "learning_rate": 3.244434596418139e-05,
+      "loss": 0.042472487688064574,
+      "mean_token_accuracy": 0.9866344403475523,
+      "num_tokens": 32854651.0,
+      "step": 390
+    },
+    {
+      "entropy": 0.0429983379173791,
+      "epoch": 0.44617958728388174,
+      "grad_norm": 0.357421875,
+      "learning_rate": 3.155947376604948e-05,
+      "loss": 0.04324407577514648,
+      "mean_token_accuracy": 0.9865379808470607,
+      "num_tokens": 33705423.0,
+      "step": 400
+    },
+    {
+      "entropy": 0.04282604140753392,
+      "epoch": 0.45733407696597883,
+      "grad_norm": 0.294921875,
+      "learning_rate": 3.066568412479167e-05,
+      "loss": 0.04259026348590851,
+      "mean_token_accuracy": 0.9867103593423963,
+      "num_tokens": 34538632.0,
+      "step": 410
+    },
+    {
+      "entropy": 0.04212904951127712,
+      "epoch": 0.46848856664807587,
+      "grad_norm": 0.4140625,
+      "learning_rate": 2.976419212542495e-05,
+      "loss": 0.04252048432826996,
+      "mean_token_accuracy": 0.9867507757619023,
+      "num_tokens": 35381587.0,
+      "step": 420
+    },
+    {
+      "entropy": 0.0412595656584017,
+      "epoch": 0.4796430563301729,
+      "grad_norm": 0.3125,
+      "learning_rate": 2.885622332413256e-05,
+      "loss": 0.041145503520965576,
+      "mean_token_accuracy": 0.9870552903041243,
+      "num_tokens": 36234112.0,
+      "step": 430
+    },
+    {
+      "entropy": 0.041778112881002014,
+      "epoch": 0.49079754601226994,
+      "grad_norm": 0.349609375,
+      "learning_rate": 2.7943012082150533e-05,
+      "loss": 0.041335687041282654,
+      "mean_token_accuracy": 0.9867611099034548,
+      "num_tokens": 37077497.0,
+      "step": 440
+    },
+    {
+      "entropy": 0.04076959296362474,
+      "epoch": 0.501952035694367,
+      "grad_norm": 0.318359375,
+      "learning_rate": 2.7025799887684002e-05,
+      "loss": 0.041106203198432924,
+      "mean_token_accuracy": 0.9871867259964346,
+      "num_tokens": 37919261.0,
+      "step": 450
+    },
+    {
+      "entropy": 0.04188558856840245,
+      "epoch": 0.5131065253764641,
+      "grad_norm": 0.314453125,
+      "learning_rate": 2.6105833668134473e-05,
+      "loss": 0.041896390914916995,
+      "mean_token_accuracy": 0.9867892485111952,
+      "num_tokens": 38782505.0,
+      "step": 460
+    },
+    {
+      "entropy": 0.04178600409068167,
+      "epoch": 0.524261015058561,
+      "grad_norm": 0.3359375,
+      "learning_rate": 2.518436409493281e-05,
+      "loss": 0.04188077747821808,
+      "mean_token_accuracy": 0.9868596900254488,
+      "num_tokens": 39610893.0,
+      "step": 470
+    },
+    {
+      "entropy": 0.04152031776320655,
+      "epoch": 0.5354155047406581,
+      "grad_norm": 0.267578125,
+      "learning_rate": 2.426264388328214e-05,
+      "loss": 0.04174352586269379,
+      "mean_token_accuracy": 0.9868257040157914,
+      "num_tokens": 40427636.0,
+      "step": 480
+    },
+    {
+      "entropy": 0.040754605704569256,
+      "epoch": 0.5465699944227551,
+      "grad_norm": 0.3125,
+      "learning_rate": 2.334192608912241e-05,
+      "loss": 0.04108997285366058,
+      "mean_token_accuracy": 0.9870152780786157,
+      "num_tokens": 41252001.0,
+      "step": 490
+    },
+    {
+      "entropy": 0.042179943548399025,
+      "epoch": 0.5577244841048522,
+      "grad_norm": 0.353515625,
+      "learning_rate": 2.2423462405631616e-05,
+      "loss": 0.04207477867603302,
+      "mean_token_accuracy": 0.9866394894197583,
+      "num_tokens": 42113694.0,
+      "step": 500
+    },
+    {
+      "entropy": 0.04122589101898484,
+      "epoch": 0.5688789737869493,
+      "grad_norm": 0.330078125,
+      "learning_rate": 2.150850146157985e-05,
+      "loss": 0.04146281182765961,
+      "mean_token_accuracy": 0.9869526766240597,
+      "num_tokens": 42941138.0,
+      "step": 510
+    },
+    {
+      "entropy": 0.04114197726012207,
+      "epoch": 0.5800334634690463,
+      "grad_norm": 0.345703125,
+      "learning_rate": 2.0598287123849095e-05,
+      "loss": 0.040973353385925296,
+      "mean_token_accuracy": 0.9871221456676722,
+      "num_tokens": 43770822.0,
+      "step": 520
+    },
+    {
+      "entropy": 0.04192428553069476,
+      "epoch": 0.5911879531511434,
+      "grad_norm": 0.302734375,
+      "learning_rate": 1.9694056806426928e-05,
+      "loss": 0.04169855713844299,
+      "mean_token_accuracy": 0.9866715084761382,
+      "num_tokens": 44637866.0,
+      "step": 530
+    },
+    {
+      "entropy": 0.03977415001136251,
+      "epoch": 0.6023424428332403,
+      "grad_norm": 0.314453125,
+      "learning_rate": 1.879703978817256e-05,
+      "loss": 0.04036850333213806,
+      "mean_token_accuracy": 0.9872556058689952,
+      "num_tokens": 45453923.0,
+      "step": 540
+    },
+    {
+      "entropy": 0.04234540155448485,
+      "epoch": 0.6134969325153374,
+      "grad_norm": 0.33203125,
+      "learning_rate": 1.7908455541642584e-05,
+      "loss": 0.04180983603000641,
+      "mean_token_accuracy": 0.9865613304078579,
+      "num_tokens": 46306551.0,
+      "step": 550
+    },
+    {
+      "entropy": 0.041263596300268546,
+      "epoch": 0.6246514221974344,
+      "grad_norm": 0.31640625,
+      "learning_rate": 1.7029512075247967e-05,
+      "loss": 0.04135525822639465,
+      "mean_token_accuracy": 0.986899808421731,
+      "num_tokens": 47143518.0,
+      "step": 560
+    },
+    {
+      "entropy": 0.04119314953277353,
+      "epoch": 0.6358059118795315,
+      "grad_norm": 0.3203125,
+      "learning_rate": 1.6161404290996412e-05,
+      "loss": 0.04146760106086731,
+      "mean_token_accuracy": 0.9868535120040178,
+      "num_tokens": 47992113.0,
+      "step": 570
+    },
+    {
+      "entropy": 0.04120078657870181,
+      "epoch": 0.6469604015616286,
+      "grad_norm": 0.30859375,
+      "learning_rate": 1.5305312360052442e-05,
+      "loss": 0.0413068950176239,
+      "mean_token_accuracy": 0.986842698045075,
+      "num_tokens": 48843712.0,
+      "step": 580
+    },
+    {
+      "entropy": 0.04128208919428289,
+      "epoch": 0.6581148912437256,
+      "grad_norm": 0.326171875,
+      "learning_rate": 1.4462400118323798e-05,
+      "loss": 0.041500210762023926,
+      "mean_token_accuracy": 0.9869369497522712,
+      "num_tokens": 49688129.0,
+      "step": 590
+    },
+    {
+      "entropy": 0.04088454471202567,
+      "epoch": 0.6692693809258227,
+      "grad_norm": 0.33203125,
+      "learning_rate": 1.3633813484255131e-05,
+      "loss": 0.041133826971054076,
+      "mean_token_accuracy": 0.9869873868301511,
+      "num_tokens": 50520741.0,
+      "step": 600
+    },
+    {
+      "entropy": 0.04227327090920881,
+      "epoch": 0.6804238706079196,
+      "grad_norm": 0.275390625,
+      "learning_rate": 1.2820678900980093e-05,
+      "loss": 0.04190162420272827,
+      "mean_token_accuracy": 0.9865590412169695,
+      "num_tokens": 51392294.0,
+      "step": 610
+    },
+    {
+      "entropy": 0.04065559499140363,
+      "epoch": 0.6915783602900167,
+      "grad_norm": 0.328125,
+      "learning_rate": 1.2024101804949638e-05,
+      "loss": 0.04115171730518341,
+      "mean_token_accuracy": 0.9869800698012113,
+      "num_tokens": 52240430.0,
+      "step": 620
+    },
+    {
+      "entropy": 0.042412614575005135,
+      "epoch": 0.7027328499721138,
+      "grad_norm": 0.3203125,
+      "learning_rate": 1.124516512311836e-05,
+      "loss": 0.04237264692783356,
+      "mean_token_accuracy": 0.9865791719406843,
+      "num_tokens": 53087953.0,
+      "step": 630
+    },
+    {
+      "entropy": 0.04051031620183494,
+      "epoch": 0.7138873396542108,
+      "grad_norm": 0.28515625,
+      "learning_rate": 1.0484927800731984e-05,
+      "loss": 0.040881377458572385,
+      "mean_token_accuracy": 0.9869993204250932,
+      "num_tokens": 53927989.0,
+      "step": 640
+    },
+    {
+      "entropy": 0.04193990352796391,
+      "epoch": 0.7250418293363079,
+      "grad_norm": 0.298828125,
+      "learning_rate": 9.744423361717323e-06,
+      "loss": 0.04187402129173279,
+      "mean_token_accuracy": 0.9866631610319019,
+      "num_tokens": 54774541.0,
+      "step": 650
+    },
+    {
+      "entropy": 0.03943951329856645,
+      "epoch": 0.7361963190184049,
+      "grad_norm": 0.3125,
+      "learning_rate": 9.024658503631967e-06,
+      "loss": 0.04017325043678284,
+      "mean_token_accuracy": 0.9874097904190421,
+      "num_tokens": 55607613.0,
+      "step": 660
+    },
+    {
+      "entropy": 0.041415746443090026,
+      "epoch": 0.747350808700502,
+      "grad_norm": 0.296875,
+      "learning_rate": 8.32661172908373e-06,
+      "loss": 0.04164916574954987,
+      "mean_token_accuracy": 0.9868578946217894,
+      "num_tokens": 56444089.0,
+      "step": 670
+    },
+    {
+      "entropy": 0.04108071085065603,
+      "epoch": 0.758505298382599,
+      "grad_norm": 0.265625,
+      "learning_rate": 7.651232015480462e-06,
+      "loss": 0.04107390642166138,
+      "mean_token_accuracy": 0.986830660328269,
+      "num_tokens": 57290368.0,
+      "step": 680
+    },
+    {
+      "entropy": 0.04229205273441039,
+      "epoch": 0.769659788064696,
+      "grad_norm": 0.306640625,
+      "learning_rate": 6.99943752491857e-06,
+      "loss": 0.04177336990833282,
+      "mean_token_accuracy": 0.9866125296801329,
+      "num_tokens": 58128968.0,
+      "step": 690
+    },
+    {
+      "entropy": 0.041548075363971294,
+      "epoch": 0.7808142777467931,
+      "grad_norm": 0.279296875,
+      "learning_rate": 6.372114355964293e-06,
+      "loss": 0.04167112410068512,
+      "mean_token_accuracy": 0.9867573702707887,
+      "num_tokens": 58984460.0,
+      "step": 700
+    },
+    {
+      "entropy": 0.0403200296277646,
+      "epoch": 0.7919687674288901,
+      "grad_norm": 0.306640625,
+      "learning_rate": 5.770115339024484e-06,
+      "loss": 0.04050106704235077,
+      "mean_token_accuracy": 0.9871903322637081,
+      "num_tokens": 59827367.0,
+      "step": 710
+    },
+    {
+      "entropy": 0.0406710500101326,
+      "epoch": 0.8031232571109872,
+      "grad_norm": 0.294921875,
+      "learning_rate": 5.194258876944705e-06,
+      "loss": 0.04084862470626831,
+      "mean_token_accuracy": 0.9871157312765717,
+      "num_tokens": 60654050.0,
+      "step": 720
+    },
+    {
+      "entropy": 0.040411298532853836,
+      "epoch": 0.8142777467930842,
+      "grad_norm": 0.28515625,
+      "learning_rate": 4.645327832410648e-06,
+      "loss": 0.040474030375480655,
+      "mean_token_accuracy": 0.9871165057644248,
+      "num_tokens": 61488530.0,
+      "step": 730
+    },
+    {
+      "entropy": 0.04196117307874374,
+      "epoch": 0.8254322364751813,
+      "grad_norm": 0.310546875,
+      "learning_rate": 4.12406846366562e-06,
+      "loss": 0.04189785122871399,
+      "mean_token_accuracy": 0.9867719961330295,
+      "num_tokens": 62326744.0,
+      "step": 740
+    },
+    {
+      "entropy": 0.040386362894787455,
+      "epoch": 0.8365867261572784,
+      "grad_norm": 0.2734375,
+      "learning_rate": 3.631189409990815e-06,
+      "loss": 0.04039705097675324,
+      "mean_token_accuracy": 0.9871017251163721,
+      "num_tokens": 63170753.0,
+      "step": 750
+    },
+    {
+      "entropy": 0.040684799235896206,
+      "epoch": 0.8477412158393753,
+      "grad_norm": 0.265625,
+      "learning_rate": 3.1673607283276813e-06,
+      "loss": 0.04109015464782715,
+      "mean_token_accuracy": 0.9869557719677686,
+      "num_tokens": 64013316.0,
+      "step": 760
+    },
+    {
+      "entropy": 0.042057951152673925,
+      "epoch": 0.8588957055214724,
+      "grad_norm": 0.275390625,
+      "learning_rate": 2.733212982351957e-06,
+      "loss": 0.04174878001213074,
+      "mean_token_accuracy": 0.986495653167367,
+      "num_tokens": 64863074.0,
+      "step": 770
+    },
+    {
+      "entropy": 0.04044588297838345,
+      "epoch": 0.8700501952035694,
+      "grad_norm": 0.283203125,
+      "learning_rate": 2.3293363852379125e-06,
+      "loss": 0.04043938219547272,
+      "mean_token_accuracy": 0.9872395290061832,
+      "num_tokens": 65709101.0,
+      "step": 780
+    },
+    {
+      "entropy": 0.04246396276575979,
+      "epoch": 0.8812046848856665,
+      "grad_norm": 0.275390625,
+      "learning_rate": 1.956279997278043e-06,
+      "loss": 0.041996100544929506,
+      "mean_token_accuracy": 0.9866539994254708,
+      "num_tokens": 66553598.0,
+      "step": 790
+    },
+    {
+      "entropy": 0.041515190360951235,
+      "epoch": 0.8923591745677635,
+      "grad_norm": 0.322265625,
+      "learning_rate": 1.6145509794491364e-06,
+      "loss": 0.041551712155342105,
+      "mean_token_accuracy": 0.9867776447907091,
+      "num_tokens": 67405428.0,
+      "step": 800
+    },
+    {
+      "entropy": 0.04269145799044054,
+      "epoch": 0.9035136642498606,
+      "grad_norm": 0.283203125,
+      "learning_rate": 1.3046139039394e-06,
+      "loss": 0.042556726932525636,
+      "mean_token_accuracy": 0.9866596391424537,
+      "num_tokens": 68245631.0,
+      "step": 810
+    },
+    {
+      "entropy": 0.04082234081579372,
+      "epoch": 0.9146681539319577,
+      "grad_norm": 0.30859375,
+      "learning_rate": 1.026890122573998e-06,
+      "loss": 0.04080590307712555,
+      "mean_token_accuracy": 0.987078714184463,
+      "num_tokens": 69069024.0,
+      "step": 820
+    },
+    {
+      "entropy": 0.04289137564774137,
+      "epoch": 0.9258226436140546,
+      "grad_norm": 0.29296875,
+      "learning_rate": 7.817571939976288e-07,
+      "loss": 0.04283967912197113,
+      "mean_token_accuracy": 0.9864464558660984,
+      "num_tokens": 69903803.0,
+      "step": 830
+    },
+    {
+      "entropy": 0.041589401010423896,
+      "epoch": 0.9369771332961517,
+      "grad_norm": 0.2890625,
+      "learning_rate": 5.695483703928306e-07,
+      "loss": 0.04148242473602295,
+      "mean_token_accuracy": 0.9868634788319468,
+      "num_tokens": 70747562.0,
+      "step": 840
+    },
+    {
+      "entropy": 0.04105772517505102,
+      "epoch": 0.9481316229782487,
+      "grad_norm": 0.271484375,
+      "learning_rate": 3.905521444318605e-07,
+      "loss": 0.04133652150630951,
+      "mean_token_accuracy": 0.986898991279304,
+      "num_tokens": 71601623.0,
+      "step": 850
+    },
+    {
+      "entropy": 0.040625668261782266,
+      "epoch": 0.9592861126603458,
+      "grad_norm": 0.322265625,
+      "learning_rate": 2.450118570779786e-07,
+      "loss": 0.04106319844722748,
+      "mean_token_accuracy": 0.9870263114571571,
+      "num_tokens": 72449591.0,
+      "step": 860
+    },
+    {
+      "entropy": 0.041737568736425604,
+      "epoch": 0.9704406023424428,
+      "grad_norm": 0.28515625,
+      "learning_rate": 1.3312536676942377e-07,
+      "loss": 0.04145269989967346,
+      "mean_token_accuracy": 0.9866915429010987,
+      "num_tokens": 73305562.0,
+      "step": 870
+    },
+    {
+      "entropy": 0.04121337772812694,
+      "epoch": 0.9815950920245399,
+      "grad_norm": 0.28515625,
+      "learning_rate": 5.5044780435722923e-08,
+      "loss": 0.04093064665794373,
+      "mean_token_accuracy": 0.986900057643652,
+      "num_tokens": 74143208.0,
+      "step": 880
+    },
+    {
+      "entropy": 0.040503930338309145,
+      "epoch": 0.992749581706637,
+      "grad_norm": 0.287109375,
+      "learning_rate": 1.0876246712074322e-08,
+      "loss": 0.0403281182050705,
+      "mean_token_accuracy": 0.9871442951261997,
+      "num_tokens": 74991508.0,
+      "step": 890
+    },
+    {
+      "epoch": 1.0,
+      "eval_entropy": 0.041321987748146057,
+      "eval_loss": 0.03999410942196846,
+      "eval_mean_token_accuracy": 0.9868998980522156,
+      "eval_num_tokens": 75541075.0,
+      "eval_runtime": 50.0483,
+      "eval_samples_per_second": 19.981,
+      "eval_steps_per_second": 9.99,
+      "step": 897
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 897,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.495254912950835e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
--- a/checkpoint-897/training_args.bin
+++ b/checkpoint-897/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf09ccdfa77c381d3289f0527036b2ded899e83c480f73afe09a2e9f4202aaef
+size 5368
--- a/config.json
+++ b/config.json
@@ -0,0 +1,62 @@
+{
+  "_sliding_window_pattern": 6,
+  "architectures": [
+    "Gemma3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attn_logit_softcapping": null,
+  "bos_token_id": 2,
+  "dtype": "bfloat16",
+  "eos_token_id": 1,
+  "final_logit_softcapping": null,
+  "head_dim": 256,
+  "hidden_activation": "gelu_pytorch_tanh",
+  "hidden_size": 640,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "layer_types": [
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "model_type": "gemma3_text",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 18,
+  "num_key_value_heads": 1,
+  "pad_token_id": 0,
+  "query_pre_attn_scalar": 256,
+  "rms_norm_eps": 1e-06,
+  "rope_parameters": {
+    "full_attention": {
+      "rope_theta": 1000000.0,
+      "rope_type": "default"
+    },
+    "sliding_attention": {
+      "rope_theta": 10000.0,
+      "rope_type": "default"
+    }
+  },
+  "sliding_window": 512,
+  "tie_word_embeddings": true,
+  "transformers_version": "5.7.0",
+  "use_bidirectional_attention": false,
+  "use_cache": false,
+  "vocab_size": 262144
+}
--- a/generation_config.json
+++ b/generation_config.json
@@ -0,0 +1,15 @@
+{
+  "bos_token_id": 2,
+  "cache_implementation": "hybrid",
+  "do_sample": true,
+  "eos_token_id": [
+    1,
+    1,
+    50,
+    106
+  ],
+  "pad_token_id": 0,
+  "top_k": 64,
+  "top_p": 0.95,
+  "transformers_version": "5.7.0"
+}
--- a/model.safetensors
+++ b/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efbb32f4990b6a076d86e5dc1b83f90236f9f459ac4cdb44d1847818f2d99171
+size 536223056
--- a/nives-fg-270m-v1-F16.gguf
+++ b/nives-fg-270m-v1-F16.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26e0a65d5714008b9c8158a2b79d25de02a2adf05b4c0856ae6e3c191f2e9d31
+size 542848640
--- a/nives-fg-270m-v1-Q4_K_M.gguf
+++ b/nives-fg-270m-v1-Q4_K_M.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36263ceda5f394b030cfc8415af615216cb11b76124a9ca25bcf1706f38ced0e
+size 253128320
--- a/runs/May05_17-50-33_d47cde4b1ec4/events.out.tfevents.1778003433.d47cde4b1ec4.11675.0
+++ b/runs/May05_17-50-33_d47cde4b1ec4/events.out.tfevents.1778003433.d47cde4b1ec4.11675.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f048174a168bace4a11eb831c560e96793f0f1f751390d8ca21fea146f6379b7
+size 40488
--- a/tokenizer.json
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80d7f800b949accd7eb940bac75e642f9468e4df157403032a55bf54ed23b650
+size 33384898
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -0,0 +1,27 @@
+{
+  "backend": "tokenizers",
+  "boi_token": "<start_of_image>",
+  "bos_token": "<bos>",
+  "clean_up_tokenization_spaces": false,
+  "eoi_token": "<end_of_image>",
+  "eos_token": "<eos>",
+  "image_token": "<image_soft_token>",
+  "is_local": false,
+  "local_files_only": false,
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "boi_token": "<start_of_image>",
+    "eoi_token": "<end_of_image>",
+    "image_token": "<image_soft_token>",
+    "sfr_token": "<start_function_response>"
+  },
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "sfr_token": "<start_function_response>",
+  "sp_model_kwargs": null,
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}
--- a/training_args.bin
+++ b/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf09ccdfa77c381d3289f0527036b2ded899e83c480f73afe09a2e9f4202aaef
+size 5368