From f2d87c32c34a3400211092611376bb50147e2e20 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Sun, 26 Apr 2026 20:54:43 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: linsong8208/trainer_output Source: Original Platform --- .gitattributes | 36 +++ README.md | 58 ++++ chat_template.jinja | 279 ++++++++++++++++++ config.json | 62 ++++ generation_config.json | 15 + model.safetensors | 3 + ....out.tfevents.1776516340.cb133b5fdde5.55.0 | 3 + ....out.tfevents.1776916194.7626ce86e78e.55.0 | 3 + ....out.tfevents.1776916358.7626ce86e78e.55.1 | 3 + ....out.tfevents.1776947097.78765439a4f5.55.0 | 3 + tokenizer.json | 3 + tokenizer_config.json | 26 ++ training_args.bin | 3 + 13 files changed, 497 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 chat_template.jinja create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 model.safetensors create mode 100644 runs/Apr18_12-45-40_cb133b5fdde5/events.out.tfevents.1776516340.cb133b5fdde5.55.0 create mode 100644 runs/Apr23_03-49-54_7626ce86e78e/events.out.tfevents.1776916194.7626ce86e78e.55.0 create mode 100644 runs/Apr23_03-52-38_7626ce86e78e/events.out.tfevents.1776916358.7626ce86e78e.55.1 create mode 100644 runs/Apr23_12-24-57_78765439a4f5/events.out.tfevents.1776947097.78765439a4f5.55.0 create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 training_args.bin diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..5c36f55 --- /dev/null +++ b/README.md @@ -0,0 +1,58 @@ +--- +base_model: google/functiongemma-270m-it +library_name: transformers +model_name: trainer_output +tags: +- generated_from_trainer +- trl +- sft +licence: license +--- + +# Model Card for trainer_output + +This model is a fine-tuned version of [google/functiongemma-270m-it](https://huggingface.co/google/functiongemma-270m-it). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="linsong8208/trainer_output", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + + + + + +This model was trained with SFT. + +### Framework versions + +- TRL: 1.2.0 +- Transformers: 5.0.0 +- Pytorch: 2.10.0+cu128 +- Datasets: 4.8.3 +- Tokenizers: 0.22.2 + +## Citations + + + +Cite TRL as: + +```bibtex +@software{vonwerra2020trl, + title = {{TRL: Transformers Reinforcement Learning}}, + author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin}, + license = {Apache-2.0}, + url = {https://github.com/huggingface/trl}, + year = {2020} +} +``` \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..1629479 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,279 @@ +{%- macro format_parameters(properties, required) -%} + {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%} + {%- set ns = namespace(found_first=false) -%} + {%- for key, value in properties | dictsort -%} + {%- if key not in standard_keys -%} + {%- if ns.found_first %},{% endif -%} + {%- set ns.found_first = true -%} + {{- key }}:{description:{{ value['description'] }} + {%- if value['type'] | upper == 'STRING' -%} + {%- if value['enum'] -%} + ,enum:{{ format_argument(value['enum']) }} + {%- endif -%} + {%- elif value['type'] | upper == 'OBJECT' -%} + ,properties:{ + {%- if value['properties'] is defined and value['properties'] is mapping -%} + {{- format_parameters(value['properties'], value['required'] | default([])) -}} + {%- elif value is mapping -%} + {{- format_parameters(value, value['required'] | default([])) -}} + {%- endif -%} + } + {%- if value['required'] -%} + ,required:[ + {%- for item in value['required'] | default([]) -%} + {{- item -}} + {%- if not loop.last %},{% endif -%} + {%- endfor -%} + ] + {%- endif -%} + {%- elif value['type'] | upper == 'ARRAY' -%} + {%- if value['items'] is mapping and value['items'] -%} + ,items:{ + {%- set ns_items = namespace(found_first=false) -%} + {%- for item_key, item_value in value['items'] | dictsort -%} + {%- if item_value is not none -%} + {%- if ns_items.found_first %},{% endif -%} + {%- set ns_items.found_first = true -%} + {%- if item_key == 'properties' -%} + properties:{ + {%- if item_value is mapping -%} + {{- format_parameters(item_value, value['items']['required'] | default([])) -}} + {%- endif -%} + } + {%- elif item_key == 'required' -%} + required:[ + {%- for req_item in item_value -%} + {{- req_item -}} + {%- if not loop.last %},{% endif -%} + {%- endfor -%} + ] + {%- elif item_key == 'type' -%} + {%- if item_value is string -%} + type:{{ format_argument(item_value | upper) }} + {%- else -%} + type:{{ format_argument(item_value | map('upper') | list) }} + {%- endif -%} + {%- else -%} + {{ item_key }}:{{ format_argument(item_value) }} + {%- endif -%} + {%- endif -%} + {%- endfor -%} + } + {%- endif -%} + {%- endif -%} + ,type:{{ value['type'] | upper }}} + {%- endif -%} + {%- endfor -%} +{%- endmacro -%} +{% macro format_function_declaration(tool_data) -%} +declaration:{{- tool_data['function']['name'] -}} +{description:{{- tool_data['function']['description'] -}} +{%- set params = tool_data['function']['parameters'] -%} +{%- if params -%} + ,parameters:{ + {%- if params['properties'] -%} + properties:{ {{- format_parameters(params['properties'], params['required']) -}} }, + {%- endif -%} + {%- if params['required'] -%} + required:[ + {%- for item in params['required'] -%} + {{- item -}} + {{- ',' if not loop.last -}} + {%- endfor -%} + ], + {%- endif -%} + {%- if params['type'] -%} + type:{{- params['type'] | upper -}}} + {%- endif -%} +{%- endif -%} +} +{%- endmacro -%} +{% macro format_argument(argument, escape_keys=True) -%} +{%- if argument is string -%} + {{- '' + argument + '' -}} +{%- elif argument is boolean -%} + {%- if argument -%} + {{- 'true' -}} + {%- else -%} + {{- 'false' -}} + {%- endif -%} +{%- elif argument is mapping -%} + {{- '{' -}} + {%- set ns = namespace(found_first=false) -%} + {%- for key, value in argument | dictsort -%} + {%- if ns.found_first %},{% endif -%} + {%- set ns.found_first = true -%} + {%- if escape_keys -%} + {{- '' + key + '' -}} + {%- else -%} + {{- key -}} + {%- endif -%} + :{{- format_argument(value, escape_keys=escape_keys) -}} + {%- endfor -%} + {{- '}' -}} +{%- elif argument is sequence -%} + {{- '[' -}} + {%- for item in argument -%} + {{- format_argument(item, escape_keys=escape_keys) -}} + {%- if not loop.last %},{% endif -%} + {%- endfor -%} + {{- ']' -}} +{%- else -%} + {{- argument -}} +{%- endif -%} +{%- endmacro -%} +{{ bos_token }} +{%- set ns = namespace(prev_message_type=None) -%} +{#- Tool Declarations -#} +{%- set loop_messages = messages -%} +{%- if tools or messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%} + {{- 'developer\n' -}} + {%- if messages[0]['role'] == 'system' or messages[0]['role'] == 'developer' -%} + {%- if messages[0]['content'] is string -%} + {{- messages[0]['content'] | trim -}} + {%- elif messages[0]['content'] is sequence -%} + {%- for item in messages[0]['content'] -%} + {%- if item['type'] == 'text' -%} + {{- item['text'] | trim -}} + {%- endif -%} + {%- endfor -%} + {%- endif -%} + {%- set loop_messages = messages[1:] -%} + {%- endif -%} + {%- if tools -%} + {%- for tool in tools %} + {{- '' -}} + {{- format_function_declaration(tool) | trim }} + {{- '' -}} + {%- endfor %} + {%- endif -%} + {{- '\n' }} +{%- endif %} +{#- Loop through messages. -#} +{%- for message in loop_messages -%} + {%- if (message['role'] == 'assistant') -%} + {#- Rename "assistant" to "model". -#} + {%- set role = "model" -%} + {%- else -%} + {%- set role = message['role'] -%} + {%- endif -%} + {%- if role != 'tool' -%} + {%- if ns.prev_message_type != 'tool_response' -%} + {{- '' + role + '\n' }} + {%- endif -%} + {%- set ns.prev_message_type = None -%} + {%- if 'content' in message and message['content'] is not none -%} + {%- if message['content'] is string -%} + {{ message['content'] | trim }} + {%- elif message['content'] is sequence -%} + {%- for item in message['content'] -%} + {%- if item['type'] == 'image' -%} + {{ '' }} + {%- elif item['type'] == 'text' -%} + {{ item['text'] | trim }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{ raise_exception("Invalid content type in user/assistant message") }} + {%- endif -%} + {%- set ns.prev_message_type = 'content' -%} + {%- endif -%} + {%- if 'tool_calls' in message and message['tool_calls'] and message['tool_calls'] is iterable -%} + {#- Tool Calls -#} + {%- for tool_call in message['tool_calls'] -%} + {% set function = tool_call['function'] %} + {{- 'call:' + function['name'] + '{' -}} + {%- if 'arguments' in function -%} + {%- if function['arguments'] is mapping -%} + {%- set ns = namespace(found_first=false) -%} + {%- for key, value in function['arguments'] | dictsort -%} + {%- if ns.found_first %},{% endif -%} + {%- set ns.found_first = true -%} + {{- key -}}:{{- format_argument(value, escape_keys=False) -}} + {%- endfor -%} + {%- elif function['arguments'] is string -%} + {# This handles string-JSON, just in case #} + {{ function['arguments'] }} + {%- endif %} + {%- endif -%} + {{- '}' -}} + {%- endfor -%} + {%- if loop.last -%} + {{ '' }} + {%- endif -%} + {%- set ns.prev_message_type = 'tool_call' -%} + {%- endif -%} + {%- else -%} + {#- Tool Responses -#} + {%- if 'content' in message and message['content'] -%} + {%- if message['content'] is mapping -%} + {%- if 'name' in message['content'] and 'response' in message['content'] -%} + {{ 'response:' + message['content']['name'] | trim + '{' }} + {%- set response_ns = namespace(found_first=false) -%} + {%- for key, value in message['content']['response'] | dictsort -%} + {%- if response_ns.found_first %},{% endif -%} + {%- set response_ns.found_first = true -%} + {{- key -}}:{{- format_argument(value, escape_keys=False) -}} + {%- endfor -%} + {{- '}' -}} + {%- elif 'name' in message -%} + {{ 'response:' + message['name'] | trim + '{' }} + {%- set response_ns = namespace(found_first=false) -%} + {%- for key, value in message['content'] | dictsort -%} + {%- if response_ns.found_first %},{% endif -%} + {%- set response_ns.found_first = true -%} + {{- key -}}:{{- format_argument(value, escape_keys=False) -}} + {%- endfor -%} + {{- '}' -}} + {%- else -%} + {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }} + {%- endif -%} + {%- elif message['content'] is string -%} + {%- if 'name' in message -%} + {{ 'response:' + message['name'] | trim + '{value:' + format_argument(message['content'], escape_keys=False) + '}' }} + {%- else -%} + {{ raise_exception("Invalid tool response: 'name' must be provided.") }} + {%- endif -%} + {%- elif message['content'] is sequence -%} + {%- for item in message['content'] -%} + {%- if item is mapping -%} + {%- if 'name' in item and 'response' in item -%} + {{ 'response:' + item['name'] | trim + '{' }} + {%- set response_ns = namespace(found_first=false) -%} + {%- for key, value in item['response'] | dictsort -%} + {%- if response_ns.found_first %},{% endif -%} + {%- set response_ns.found_first = true -%} + {{- key -}}:{{- format_argument(value, escape_keys=False) -}} + {%- endfor -%} + {{- '}' -}} + {%- elif 'name' in message -%} + {{ 'response:' + message['name'] | trim + '{' }} + {%- set response_ns = namespace(found_first=false) -%} + {%- for key, value in item | dictsort -%} + {%- if response_ns.found_first %},{% endif -%} + {%- set response_ns.found_first = true -%} + {{- key -}}:{{- format_argument(value, escape_keys=False) -}} + {%- endfor -%} + {{- '}' -}} + {%- else -%} + {{ raise_exception("Invalid tool response mapping: must contain 'name' and 'response' keys, or 'name' must be in the message.") }} + {%- endif -%} + {%- else -%} + {{ raise_exception("Invalid tool response message: multiple responses must all be mappings") }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{ raise_exception("Invalid content type in tool message: must be mapping, sequence of mappings, or string.") }} + {%- endif -%} + {%- endif -%} + {%- set ns.prev_message_type = 'tool_response' -%} + {%- endif -%} + {%- if ns.prev_message_type not in ['tool_call', 'tool_response'] -%} + {{ '\n' }} + {%- endif -%} +{%- endfor -%} +{%- if add_generation_prompt -%} + {%- if ns.prev_message_type != 'tool_response' -%} + {{- 'model\n' -}} + {%- endif -%} +{%- endif -%} diff --git a/config.json b/config.json new file mode 100644 index 0000000..436e93d --- /dev/null +++ b/config.json @@ -0,0 +1,62 @@ +{ + "_sliding_window_pattern": 6, + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "bos_token_id": 2, + "dtype": "bfloat16", + "eos_token_id": 1, + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "full_attention": { + "rope_theta": 1000000.0, + "rope_type": "default" + }, + "sliding_attention": { + "rope_theta": 10000.0, + "rope_type": "default" + } + }, + "sliding_window": 512, + "tie_word_embeddings": true, + "transformers_version": "5.0.0", + "use_bidirectional_attention": false, + "use_cache": false, + "vocab_size": 262144 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..bfdbec2 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,15 @@ +{ + "bos_token_id": 2, + "cache_implementation": "hybrid", + "do_sample": true, + "eos_token_id": [ + 1, + 1, + 50, + 106 + ], + "pad_token_id": 0, + "top_k": 64, + "top_p": 0.95, + "transformers_version": "5.0.0" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..a1b6c56 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e062f35d7607084da92a1c60fcd040d1400a3e8b42a9bb318620c1f78c8f1ff3 +size 536223056 diff --git a/runs/Apr18_12-45-40_cb133b5fdde5/events.out.tfevents.1776516340.cb133b5fdde5.55.0 b/runs/Apr18_12-45-40_cb133b5fdde5/events.out.tfevents.1776516340.cb133b5fdde5.55.0 new file mode 100644 index 0000000..922c151 --- /dev/null +++ b/runs/Apr18_12-45-40_cb133b5fdde5/events.out.tfevents.1776516340.cb133b5fdde5.55.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e122b6d9b53b8ddfe0066c5e1bb76e2cf214004022b4c271f74a45aa30c76536 +size 18461 diff --git a/runs/Apr23_03-49-54_7626ce86e78e/events.out.tfevents.1776916194.7626ce86e78e.55.0 b/runs/Apr23_03-49-54_7626ce86e78e/events.out.tfevents.1776916194.7626ce86e78e.55.0 new file mode 100644 index 0000000..bd34625 --- /dev/null +++ b/runs/Apr23_03-49-54_7626ce86e78e/events.out.tfevents.1776916194.7626ce86e78e.55.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3990bc035bb2d488ea49ad22cfe36cc129e3c7d89307d0658c50179c63f06cec +size 18461 diff --git a/runs/Apr23_03-52-38_7626ce86e78e/events.out.tfevents.1776916358.7626ce86e78e.55.1 b/runs/Apr23_03-52-38_7626ce86e78e/events.out.tfevents.1776916358.7626ce86e78e.55.1 new file mode 100644 index 0000000..a95bf75 --- /dev/null +++ b/runs/Apr23_03-52-38_7626ce86e78e/events.out.tfevents.1776916358.7626ce86e78e.55.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01c101b300e217bcdca2c88120ddf17f66a892b98929af72eeae293e8e3038dc +size 18461 diff --git a/runs/Apr23_12-24-57_78765439a4f5/events.out.tfevents.1776947097.78765439a4f5.55.0 b/runs/Apr23_12-24-57_78765439a4f5/events.out.tfevents.1776947097.78765439a4f5.55.0 new file mode 100644 index 0000000..46aaec9 --- /dev/null +++ b/runs/Apr23_12-24-57_78765439a4f5/events.out.tfevents.1776947097.78765439a4f5.55.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04c2193189bafe0a6d06a5292f61b6c102c04bc99edd0b569402c46aa310092d +size 18461 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..f2524b9 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3655797f9d732b7dc08b4225200697af8e37d94b74711d9b1d8166feb953578 +size 33384774 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..e1a3c37 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,26 @@ +{ + "backend": "tokenizers", + "boi_token": "", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eoi_token": "", + "eos_token": "", + "image_token": "", + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "model_specific_special_tokens": { + "boi_token": "", + "eoi_token": "", + "image_token": "", + "sfr_token": "" + }, + "pad_token": "", + "padding_side": "left", + "sfr_token": "", + "sp_model_kwargs": null, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..42208c1 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6d35d8a5fa376ea51d4a8982db2ed0c5b1841e4cce05d3fa443ab9c8f637f46 +size 5649