初始化项目,由ModelHub XC社区提供模型
Model: terasut/gkd-qwen-2.5-0.5b-base_v5_from1.5b_eff32 Source: Original Platform
This commit is contained in:
56
checkpoint-2625/chat_template.jinja
Normal file
56
checkpoint-2625/chat_template.jinja
Normal file
@@ -0,0 +1,56 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{{- messages[0]['content'] }}
|
||||
{%- else %}
|
||||
{{- 'You are a helpful assistant.' }}
|
||||
{%- endif %}
|
||||
{{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- for message in messages %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" and not message.tool_calls %}
|
||||
{{- '<|im_start|>assistant\n' }}{% generation %}{{- message.content + '<|im_end|>\n' }}{% endgeneration %}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{{- '<|im_start|>assistant' }}{% generation %}
|
||||
{%- if message.content %}
|
||||
{{- '\n' + message.content }}
|
||||
{%- endif %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if tool_call.function is defined %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{{- '<|im_end|>\n' }}{% endgeneration %}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- message.content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- endif %}
|
||||
58
checkpoint-2625/config.json
Normal file
58
checkpoint-2625/config.json
Normal file
@@ -0,0 +1,58 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen2ForCausalLM"
|
||||
],
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": null,
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": 151645,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 896,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 4864,
|
||||
"layer_types": [
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 32768,
|
||||
"max_window_layers": 24,
|
||||
"model_type": "qwen2",
|
||||
"num_attention_heads": 14,
|
||||
"num_hidden_layers": 24,
|
||||
"num_key_value_heads": 2,
|
||||
"pad_token_id": 151643,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_parameters": {
|
||||
"rope_theta": 1000000.0,
|
||||
"rope_type": "default"
|
||||
},
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": true,
|
||||
"transformers_version": "5.5.3",
|
||||
"use_cache": false,
|
||||
"use_mrope": false,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
10
checkpoint-2625/generation_config.json
Normal file
10
checkpoint-2625/generation_config.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"do_sample": false,
|
||||
"eos_token_id": [
|
||||
151645
|
||||
],
|
||||
"max_new_tokens": 2048,
|
||||
"pad_token_id": 151643,
|
||||
"transformers_version": "5.5.3",
|
||||
"use_cache": false
|
||||
}
|
||||
3
checkpoint-2625/model.safetensors
Normal file
3
checkpoint-2625/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:10cbf78c24b74c3578136d377abf51eeb08995479f597d62e92ca7c6298a5d05
|
||||
size 988097824
|
||||
3
checkpoint-2625/optimizer.pt
Normal file
3
checkpoint-2625/optimizer.pt
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:efe47928026abf0fda933a387e0c21813634b64b568a52b39247f08d5a1646fa
|
||||
size 1976378699
|
||||
3
checkpoint-2625/rng_state.pth
Normal file
3
checkpoint-2625/rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1bb7e3e0894f95e6b09893239ea7465e0b0f84960c938750037c83d683a7c132
|
||||
size 14645
|
||||
3
checkpoint-2625/scheduler.pt
Normal file
3
checkpoint-2625/scheduler.pt
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a90cf32bcaa40d2760ac49ca5a2167553d01347c3172274302ea44fcf542f183
|
||||
size 1465
|
||||
3
checkpoint-2625/tokenizer.json
Normal file
3
checkpoint-2625/tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8
|
||||
size 11421892
|
||||
30
checkpoint-2625/tokenizer_config.json
Normal file
30
checkpoint-2625/tokenizer_config.json
Normal file
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"add_prefix_space": false,
|
||||
"backend": "tokenizers",
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"is_local": true,
|
||||
"model_max_length": 131072,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"padding_side": "left",
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"unk_token": null
|
||||
}
|
||||
1868
checkpoint-2625/trainer_state.json
Normal file
1868
checkpoint-2625/trainer_state.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-2625/training_args.bin
Normal file
3
checkpoint-2625/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e8a2ef5b8d79835ff6ba94d10403139a3a9af53e51adfe2987ac1e7ff4f8e1ec
|
||||
size 6289
|
||||
Reference in New Issue
Block a user