初始化项目,由ModelHub XC社区提供模型
Model: mremila/Llama-3.1-8B-precise_if Source: Original Platform
This commit is contained in:
109
checkpoint-118/chat_template.jinja
Normal file
109
checkpoint-118/chat_template.jinja
Normal file
@@ -0,0 +1,109 @@
|
||||
{{- bos_token }}
|
||||
{%- if custom_tools is defined %}
|
||||
{%- set tools = custom_tools %}
|
||||
{%- endif %}
|
||||
{%- if not tools_in_user_message is defined %}
|
||||
{%- set tools_in_user_message = true %}
|
||||
{%- endif %}
|
||||
{%- if not date_string is defined %}
|
||||
{%- set date_string = "26 Jul 2024" %}
|
||||
{%- endif %}
|
||||
{%- if not tools is defined %}
|
||||
{%- set tools = none %}
|
||||
{%- endif %}
|
||||
|
||||
{#- This block extracts the system message, so we can slot it into the right place. #}
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{%- set system_message = messages[0]['content']|trim %}
|
||||
{%- set messages = messages[1:] %}
|
||||
{%- else %}
|
||||
{%- set system_message = "" %}
|
||||
{%- endif %}
|
||||
|
||||
{#- System message + builtin tools #}
|
||||
{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
|
||||
{%- if builtin_tools is defined or tools is not none %}
|
||||
{{- "Environment: ipython\n" }}
|
||||
{%- endif %}
|
||||
{%- if builtin_tools is defined %}
|
||||
{{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}}
|
||||
{%- endif %}
|
||||
{{- "Cutting Knowledge Date: December 2023\n" }}
|
||||
{{- "Today Date: " + date_string + "\n\n" }}
|
||||
{%- if tools is not none and not tools_in_user_message %}
|
||||
{{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
|
||||
{{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
|
||||
{{- "Do not use variables.\n\n" }}
|
||||
{%- for t in tools %}
|
||||
{{- t | tojson(indent=4) }}
|
||||
{{- "\n\n" }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- system_message }}
|
||||
{{- "<|eot_id|>" }}
|
||||
|
||||
{#- Custom tools are passed in a user message with some extra guidance #}
|
||||
{%- if tools_in_user_message and not tools is none %}
|
||||
{#- Extract the first user message so we can plug it in here #}
|
||||
{%- if messages | length != 0 %}
|
||||
{%- set first_user_message = messages[0]['content']|trim %}
|
||||
{%- set messages = messages[1:] %}
|
||||
{%- else %}
|
||||
{{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
|
||||
{%- endif %}
|
||||
{{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
|
||||
{{- "Given the following functions, please respond with a JSON for a function call " }}
|
||||
{{- "with its proper arguments that best answers the given prompt.\n\n" }}
|
||||
{{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
|
||||
{{- "Do not use variables.\n\n" }}
|
||||
{%- for t in tools %}
|
||||
{{- t | tojson(indent=4) }}
|
||||
{{- "\n\n" }}
|
||||
{%- endfor %}
|
||||
{{- first_user_message + "<|eot_id|>"}}
|
||||
{%- endif %}
|
||||
|
||||
{%- for message in messages %}
|
||||
{%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
|
||||
{{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
|
||||
{%- elif 'tool_calls' in message %}
|
||||
{%- if not message.tool_calls|length == 1 %}
|
||||
{{- raise_exception("This model only supports single tool-calls at once!") }}
|
||||
{%- endif %}
|
||||
{%- set tool_call = message.tool_calls[0].function %}
|
||||
{%- if builtin_tools is defined and tool_call.name in builtin_tools %}
|
||||
{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
|
||||
{{- "<|python_tag|>" + tool_call.name + ".call(" }}
|
||||
{%- for arg_name, arg_val in tool_call.arguments | items %}
|
||||
{{- arg_name + '="' + arg_val + '"' }}
|
||||
{%- if not loop.last %}
|
||||
{{- ", " }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{{- ")" }}
|
||||
{%- else %}
|
||||
{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
|
||||
{{- '{"name": "' + tool_call.name + '", ' }}
|
||||
{{- '"parameters": ' }}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{{- "}" }}
|
||||
{%- endif %}
|
||||
{%- if builtin_tools is defined %}
|
||||
{#- This means we're in ipython mode #}
|
||||
{{- "<|eom_id|>" }}
|
||||
{%- else %}
|
||||
{{- "<|eot_id|>" }}
|
||||
{%- endif %}
|
||||
{%- elif message.role == "tool" or message.role == "ipython" %}
|
||||
{{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
|
||||
{%- if message.content is mapping or message.content is iterable %}
|
||||
{{- message.content | tojson }}
|
||||
{%- else %}
|
||||
{{- message.content }}
|
||||
{%- endif %}
|
||||
{{- "<|eot_id|>" }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
|
||||
{%- endif %}
|
||||
36
checkpoint-118/config.json
Normal file
36
checkpoint-118/config.json
Normal file
@@ -0,0 +1,36 @@
|
||||
{
|
||||
"architectures": [
|
||||
"LlamaForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 128000,
|
||||
"dtype": "float32",
|
||||
"eos_token_id": 128009,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 14336,
|
||||
"max_position_embeddings": 131072,
|
||||
"mlp_bias": false,
|
||||
"model_type": "llama",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 32,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 128009,
|
||||
"pretraining_tp": 1,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_parameters": {
|
||||
"factor": 8.0,
|
||||
"high_freq_factor": 4.0,
|
||||
"low_freq_factor": 1.0,
|
||||
"original_max_position_embeddings": 8192,
|
||||
"rope_theta": 500000.0,
|
||||
"rope_type": "llama3"
|
||||
},
|
||||
"tie_word_embeddings": false,
|
||||
"transformers_version": "5.3.0",
|
||||
"use_cache": false,
|
||||
"vocab_size": 128256
|
||||
}
|
||||
13
checkpoint-118/generation_config.json
Normal file
13
checkpoint-118/generation_config.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"_from_model_config": true,
|
||||
"bos_token_id": 128000,
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
128009,
|
||||
128001
|
||||
],
|
||||
"pad_token_id": 128009,
|
||||
"temperature": 0.6,
|
||||
"top_p": 0.9,
|
||||
"transformers_version": "5.3.0"
|
||||
}
|
||||
3
checkpoint-118/model.safetensors
Normal file
3
checkpoint-118/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b9d370d571c5125b36a8ab786c422444f79f4eaff9b5bfbde3cf27cf6260d751
|
||||
size 32121079032
|
||||
3
checkpoint-118/optimizer.bin
Normal file
3
checkpoint-118/optimizer.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e3d2bb67abad04f6bb85058ca19855f287c17ca69c385e4dee8a4ea84b49199b
|
||||
size 64242369179
|
||||
3
checkpoint-118/pytorch_model_fsdp.bin
Normal file
3
checkpoint-118/pytorch_model_fsdp.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:341383a19e39298341c9e1cbbfdd3e32a15d9cf1e15711efff0c85e4a210c9c7
|
||||
size 32121192148
|
||||
3
checkpoint-118/rng_state_0.pth
Normal file
3
checkpoint-118/rng_state_0.pth
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:659b1cdee2219458dd84ce6a632a595465680b8080e5c44bd600ff97eca8d752
|
||||
size 15429
|
||||
3
checkpoint-118/rng_state_1.pth
Normal file
3
checkpoint-118/rng_state_1.pth
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:86accf27064cdd503053e90476a6bd10de333d4ff0594535ad55ea13a473c91d
|
||||
size 15429
|
||||
3
checkpoint-118/rng_state_2.pth
Normal file
3
checkpoint-118/rng_state_2.pth
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:18ca8d714ef40be035404c1957b5a4dee84e1f43980408393f8aa710552ee6f6
|
||||
size 15429
|
||||
3
checkpoint-118/rng_state_3.pth
Normal file
3
checkpoint-118/rng_state_3.pth
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2cfdebe99e40accc9c9d8f09c63136a14abda997d9b501969ec8e16e9d183179
|
||||
size 15429
|
||||
3
checkpoint-118/scheduler.pt
Normal file
3
checkpoint-118/scheduler.pt
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5e972c3b5e5b3776f0182f4d6b7133009ac64bb40a87dc0cc35d9331c704607b
|
||||
size 1465
|
||||
3
checkpoint-118/tokenizer.json
Normal file
3
checkpoint-118/tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
|
||||
size 17209920
|
||||
14
checkpoint-118/tokenizer_config.json
Normal file
14
checkpoint-118/tokenizer_config.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"backend": "tokenizers",
|
||||
"bos_token": "<|begin_of_text|>",
|
||||
"clean_up_tokenization_spaces": true,
|
||||
"eos_token": "<|eot_id|>",
|
||||
"is_local": false,
|
||||
"model_input_names": [
|
||||
"input_ids",
|
||||
"attention_mask"
|
||||
],
|
||||
"model_max_length": 131072,
|
||||
"pad_token": "<|eot_id|>",
|
||||
"tokenizer_class": "TokenizersBackend"
|
||||
}
|
||||
144
checkpoint-118/trainer_state.json
Normal file
144
checkpoint-118/trainer_state.json
Normal file
@@ -0,0 +1,144 @@
|
||||
{
|
||||
"best_global_step": null,
|
||||
"best_metric": null,
|
||||
"best_model_checkpoint": null,
|
||||
"epoch": 1.0,
|
||||
"eval_steps": 500,
|
||||
"global_step": 118,
|
||||
"is_hyper_param_search": false,
|
||||
"is_local_process_zero": true,
|
||||
"is_world_process_zero": true,
|
||||
"log_history": [
|
||||
{
|
||||
"entropy": 1.6852783646434546,
|
||||
"epoch": 0.08537886872998933,
|
||||
"grad_norm": 0.539056658744812,
|
||||
"learning_rate": 9.56140350877193e-06,
|
||||
"loss": 1.5916325569152832,
|
||||
"mean_token_accuracy": 0.6394169898703694,
|
||||
"num_tokens": 1036987.0,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"entropy": 1.3057927396148443,
|
||||
"epoch": 0.17075773745997866,
|
||||
"grad_norm": 0.09388110786676407,
|
||||
"learning_rate": 8.68421052631579e-06,
|
||||
"loss": 1.1732550621032716,
|
||||
"mean_token_accuracy": 0.6894607817754149,
|
||||
"num_tokens": 2099721.0,
|
||||
"step": 20
|
||||
},
|
||||
{
|
||||
"entropy": 1.24310187920928,
|
||||
"epoch": 0.256136606189968,
|
||||
"grad_norm": 0.08361112326383591,
|
||||
"learning_rate": 7.80701754385965e-06,
|
||||
"loss": 1.083221435546875,
|
||||
"mean_token_accuracy": 0.7096419665962458,
|
||||
"num_tokens": 3148066.0,
|
||||
"step": 30
|
||||
},
|
||||
{
|
||||
"entropy": 1.2204767568036914,
|
||||
"epoch": 0.3415154749199573,
|
||||
"grad_norm": 0.06735046952962875,
|
||||
"learning_rate": 6.92982456140351e-06,
|
||||
"loss": 1.0362739562988281,
|
||||
"mean_token_accuracy": 0.7181693298742176,
|
||||
"num_tokens": 4198507.0,
|
||||
"step": 40
|
||||
},
|
||||
{
|
||||
"entropy": 1.1896080307662487,
|
||||
"epoch": 0.42689434364994666,
|
||||
"grad_norm": 0.05402417853474617,
|
||||
"learning_rate": 6.0526315789473685e-06,
|
||||
"loss": 1.0045047760009767,
|
||||
"mean_token_accuracy": 0.7199778087437153,
|
||||
"num_tokens": 5240508.0,
|
||||
"step": 50
|
||||
},
|
||||
{
|
||||
"entropy": 1.136293525248766,
|
||||
"epoch": 0.512273212379936,
|
||||
"grad_norm": 0.04568689689040184,
|
||||
"learning_rate": 5.175438596491229e-06,
|
||||
"loss": 0.9714550018310547,
|
||||
"mean_token_accuracy": 0.7258936163038016,
|
||||
"num_tokens": 6301215.0,
|
||||
"step": 60
|
||||
},
|
||||
{
|
||||
"entropy": 1.110643889568746,
|
||||
"epoch": 0.5976520811099253,
|
||||
"grad_norm": 0.04777698218822479,
|
||||
"learning_rate": 4.298245614035088e-06,
|
||||
"loss": 0.9732734680175781,
|
||||
"mean_token_accuracy": 0.7243976121768355,
|
||||
"num_tokens": 7347766.0,
|
||||
"step": 70
|
||||
},
|
||||
{
|
||||
"entropy": 1.0825907880440355,
|
||||
"epoch": 0.6830309498399146,
|
||||
"grad_norm": 0.04266300052404404,
|
||||
"learning_rate": 3.421052631578948e-06,
|
||||
"loss": 0.9640823364257812,
|
||||
"mean_token_accuracy": 0.7246530564501882,
|
||||
"num_tokens": 8414578.0,
|
||||
"step": 80
|
||||
},
|
||||
{
|
||||
"entropy": 1.0757255567237736,
|
||||
"epoch": 0.768409818569904,
|
||||
"grad_norm": 0.06162378564476967,
|
||||
"learning_rate": 2.5438596491228075e-06,
|
||||
"loss": 0.9552610397338868,
|
||||
"mean_token_accuracy": 0.7269493261352181,
|
||||
"num_tokens": 9448833.0,
|
||||
"step": 90
|
||||
},
|
||||
{
|
||||
"entropy": 1.07811812851578,
|
||||
"epoch": 0.8537886872998933,
|
||||
"grad_norm": 0.044764406979084015,
|
||||
"learning_rate": 1.6666666666666667e-06,
|
||||
"loss": 0.9457586288452149,
|
||||
"mean_token_accuracy": 0.7318377941846848,
|
||||
"num_tokens": 10496356.0,
|
||||
"step": 100
|
||||
},
|
||||
{
|
||||
"entropy": 1.0599956944584847,
|
||||
"epoch": 0.9391675560298826,
|
||||
"grad_norm": 0.047913916409015656,
|
||||
"learning_rate": 7.894736842105263e-07,
|
||||
"loss": 0.9395035743713379,
|
||||
"mean_token_accuracy": 0.7343964511528611,
|
||||
"num_tokens": 11547813.0,
|
||||
"step": 110
|
||||
}
|
||||
],
|
||||
"logging_steps": 10,
|
||||
"max_steps": 118,
|
||||
"num_input_tokens_seen": 0,
|
||||
"num_train_epochs": 1,
|
||||
"save_steps": 500,
|
||||
"stateful_callbacks": {
|
||||
"TrainerControl": {
|
||||
"args": {
|
||||
"should_epoch_stop": false,
|
||||
"should_evaluate": false,
|
||||
"should_log": false,
|
||||
"should_save": true,
|
||||
"should_training_stop": true
|
||||
},
|
||||
"attributes": {}
|
||||
}
|
||||
},
|
||||
"total_flos": 1.8750638152640102e+17,
|
||||
"train_batch_size": 2,
|
||||
"trial_name": null,
|
||||
"trial_params": null
|
||||
}
|
||||
3
checkpoint-118/training_args.bin
Normal file
3
checkpoint-118/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:44c0af6138a7643901e805c84ec6f92c4fed3eef43ef5d9b9864f9e7d01680ab
|
||||
size 6097
|
||||
Reference in New Issue
Block a user