初始化项目,由ModelHub XC社区提供模型
Model: wmln/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-strong_wise_gecko Source: Original Platform
This commit is contained in:
233
trainer_state.json
Normal file
233
trainer_state.json
Normal file
@@ -0,0 +1,233 @@
|
||||
{
|
||||
"best_global_step": null,
|
||||
"best_metric": null,
|
||||
"best_model_checkpoint": null,
|
||||
"epoch": 1.0,
|
||||
"eval_steps": 500,
|
||||
"global_step": 20,
|
||||
"is_hyper_param_search": false,
|
||||
"is_local_process_zero": true,
|
||||
"is_world_process_zero": true,
|
||||
"log_history": [
|
||||
{
|
||||
"completion_length": 375.875,
|
||||
"epoch": 0.1,
|
||||
"grad_norm": 34.61153793334961,
|
||||
"kl": 0.0,
|
||||
"learning_rate": 4.965903258506806e-07,
|
||||
"loss": 0.0,
|
||||
"reward": 0.3664499084734416,
|
||||
"reward_std": 0.37731685693142936,
|
||||
"rewards/concensus_correctness_reward_func": 0.0,
|
||||
"rewards/consensus_reward_func": 0.0,
|
||||
"rewards/cumulative_reward_2": 0.0,
|
||||
"rewards/final_correctness_reward_func": 0.0,
|
||||
"rewards/question_recreation_reward_func": 0.24698116456056596,
|
||||
"rewards/soft_format_reward_func": 0.015625,
|
||||
"rewards/strict_format_reward_func": 0.0,
|
||||
"rewards/xmlcount_reward_func": 0.10384375043213367,
|
||||
"step": 2
|
||||
},
|
||||
{
|
||||
"completion_length": 356.71875,
|
||||
"epoch": 0.2,
|
||||
"grad_norm": 27.17987060546875,
|
||||
"kl": 0.0014200315781636164,
|
||||
"learning_rate": 4.698684378016222e-07,
|
||||
"loss": 0.0,
|
||||
"reward": 0.44327147863805294,
|
||||
"reward_std": 0.5475165799725801,
|
||||
"rewards/concensus_correctness_reward_func": 0.015687499195337296,
|
||||
"rewards/consensus_reward_func": 0.0625,
|
||||
"rewards/cumulative_reward_2": 0.0,
|
||||
"rewards/final_correctness_reward_func": 0.0625,
|
||||
"rewards/question_recreation_reward_func": 0.27499022823758423,
|
||||
"rewards/soft_format_reward_func": 0.0,
|
||||
"rewards/strict_format_reward_func": 0.0,
|
||||
"rewards/xmlcount_reward_func": 0.02759375609457493,
|
||||
"step": 4
|
||||
},
|
||||
{
|
||||
"completion_length": 364.78125,
|
||||
"epoch": 0.3,
|
||||
"grad_norm": 8.681589126586914,
|
||||
"kl": 0.01875807526448625,
|
||||
"learning_rate": 4.193203929064353e-07,
|
||||
"loss": 0.0,
|
||||
"reward": 0.3844468754250556,
|
||||
"reward_std": 0.26774020673474297,
|
||||
"rewards/concensus_correctness_reward_func": 0.0,
|
||||
"rewards/consensus_reward_func": 0.0,
|
||||
"rewards/cumulative_reward_2": 0.0,
|
||||
"rewards/final_correctness_reward_func": 0.0,
|
||||
"rewards/question_recreation_reward_func": 0.31691562850028276,
|
||||
"rewards/soft_format_reward_func": 0.0,
|
||||
"rewards/strict_format_reward_func": 0.0,
|
||||
"rewards/xmlcount_reward_func": 0.06753125367686152,
|
||||
"step": 6
|
||||
},
|
||||
{
|
||||
"completion_length": 363.6875,
|
||||
"epoch": 0.4,
|
||||
"grad_norm": 76.04808807373047,
|
||||
"kl": 0.008640145704703173,
|
||||
"learning_rate": 3.5042385616324236e-07,
|
||||
"loss": 0.0,
|
||||
"reward": 0.1568439636612311,
|
||||
"reward_std": 0.5598289684858173,
|
||||
"rewards/concensus_correctness_reward_func": 0.0,
|
||||
"rewards/consensus_reward_func": 0.0,
|
||||
"rewards/cumulative_reward_2": 0.0,
|
||||
"rewards/final_correctness_reward_func": 0.0625,
|
||||
"rewards/question_recreation_reward_func": 0.18931270475150086,
|
||||
"rewards/soft_format_reward_func": 0.0,
|
||||
"rewards/strict_format_reward_func": 0.0,
|
||||
"rewards/xmlcount_reward_func": -0.09496874565957114,
|
||||
"step": 8
|
||||
},
|
||||
{
|
||||
"completion_length": 395.5625,
|
||||
"epoch": 0.5,
|
||||
"grad_norm": 8.20077133178711,
|
||||
"kl": 0.00585838263577898,
|
||||
"learning_rate": 2.706448363680831e-07,
|
||||
"loss": 0.0,
|
||||
"reward": 0.44814145751297474,
|
||||
"reward_std": 0.4613042630953714,
|
||||
"rewards/concensus_correctness_reward_func": 0.0,
|
||||
"rewards/consensus_reward_func": 0.0,
|
||||
"rewards/cumulative_reward_2": 0.0,
|
||||
"rewards/final_correctness_reward_func": 0.0625,
|
||||
"rewards/question_recreation_reward_func": 0.2386414643842727,
|
||||
"rewards/soft_format_reward_func": 0.0,
|
||||
"rewards/strict_format_reward_func": 0.0,
|
||||
"rewards/xmlcount_reward_func": 0.147000000346452,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"completion_length": 333.625,
|
||||
"epoch": 0.6,
|
||||
"grad_norm": 9.751823425292969,
|
||||
"kl": 0.9474406025801727,
|
||||
"learning_rate": 1.886286282148002e-07,
|
||||
"loss": 0.0009,
|
||||
"reward": 0.49279772784211673,
|
||||
"reward_std": 0.411659850156866,
|
||||
"rewards/concensus_correctness_reward_func": 0.0,
|
||||
"rewards/consensus_reward_func": 0.0,
|
||||
"rewards/cumulative_reward_2": 0.0,
|
||||
"rewards/final_correctness_reward_func": 0.0625,
|
||||
"rewards/question_recreation_reward_func": 0.3130477310915012,
|
||||
"rewards/soft_format_reward_func": 0.0,
|
||||
"rewards/strict_format_reward_func": 0.0,
|
||||
"rewards/xmlcount_reward_func": 0.11725000198930502,
|
||||
"step": 12
|
||||
},
|
||||
{
|
||||
"completion_length": 257.25,
|
||||
"epoch": 0.7,
|
||||
"grad_norm": 7.342770099639893,
|
||||
"kl": 0.00215080863199546,
|
||||
"learning_rate": 1.1326296046939333e-07,
|
||||
"loss": 0.0,
|
||||
"reward": 0.44488345994614065,
|
||||
"reward_std": 0.318172043771483,
|
||||
"rewards/concensus_correctness_reward_func": 0.0,
|
||||
"rewards/consensus_reward_func": 0.0,
|
||||
"rewards/cumulative_reward_2": 0.0,
|
||||
"rewards/final_correctness_reward_func": 0.0625,
|
||||
"rewards/question_recreation_reward_func": 0.19450845930259675,
|
||||
"rewards/soft_format_reward_func": 0.0,
|
||||
"rewards/strict_format_reward_func": 0.0,
|
||||
"rewards/xmlcount_reward_func": 0.18787499610334635,
|
||||
"step": 14
|
||||
},
|
||||
{
|
||||
"completion_length": 390.46875,
|
||||
"epoch": 0.8,
|
||||
"grad_norm": 4.781615257263184,
|
||||
"kl": 0.0012646604518522508,
|
||||
"learning_rate": 5.271487265090163e-08,
|
||||
"loss": 0.0,
|
||||
"reward": 0.4826530911959708,
|
||||
"reward_std": 0.7335095015587285,
|
||||
"rewards/concensus_correctness_reward_func": 0.0,
|
||||
"rewards/consensus_reward_func": 0.0,
|
||||
"rewards/cumulative_reward_2": 0.0,
|
||||
"rewards/final_correctness_reward_func": 0.25,
|
||||
"rewards/question_recreation_reward_func": 0.42434057663194835,
|
||||
"rewards/soft_format_reward_func": 0.0,
|
||||
"rewards/strict_format_reward_func": 0.0,
|
||||
"rewards/xmlcount_reward_func": -0.1916874992166413,
|
||||
"step": 16
|
||||
},
|
||||
{
|
||||
"completion_length": 397.03125,
|
||||
"epoch": 0.9,
|
||||
"grad_norm": 8.631686210632324,
|
||||
"kl": 0.23009972504951293,
|
||||
"learning_rate": 1.3545689574841341e-08,
|
||||
"loss": 0.0002,
|
||||
"reward": 1.1463897689245641,
|
||||
"reward_std": 1.4765515620936185,
|
||||
"rewards/concensus_correctness_reward_func": 0.7297499999403954,
|
||||
"rewards/consensus_reward_func": 0.0,
|
||||
"rewards/cumulative_reward_2": 0.0,
|
||||
"rewards/final_correctness_reward_func": 0.1875,
|
||||
"rewards/question_recreation_reward_func": 0.2630148070747964,
|
||||
"rewards/soft_format_reward_func": 0.0,
|
||||
"rewards/strict_format_reward_func": 0.0,
|
||||
"rewards/xmlcount_reward_func": -0.03387500322423875,
|
||||
"step": 18
|
||||
},
|
||||
{
|
||||
"completion_length": 307.84375,
|
||||
"epoch": 1.0,
|
||||
"grad_norm": 68.26083374023438,
|
||||
"kl": 3.183381193990499,
|
||||
"learning_rate": 0.0,
|
||||
"loss": 0.0032,
|
||||
"reward": -0.019438669085502625,
|
||||
"reward_std": 0.5264092059223913,
|
||||
"rewards/concensus_correctness_reward_func": 0.0,
|
||||
"rewards/consensus_reward_func": 0.0,
|
||||
"rewards/cumulative_reward_2": 0.0,
|
||||
"rewards/final_correctness_reward_func": 0.0,
|
||||
"rewards/question_recreation_reward_func": 0.16081133193802088,
|
||||
"rewards/soft_format_reward_func": 0.0,
|
||||
"rewards/strict_format_reward_func": 0.0,
|
||||
"rewards/xmlcount_reward_func": -0.18025000300258398,
|
||||
"step": 20
|
||||
},
|
||||
{
|
||||
"epoch": 1.0,
|
||||
"step": 20,
|
||||
"total_flos": 0.0,
|
||||
"train_loss": 0.0004398912084980111,
|
||||
"train_runtime": 308.8876,
|
||||
"train_samples_per_second": 1.036,
|
||||
"train_steps_per_second": 0.065
|
||||
}
|
||||
],
|
||||
"logging_steps": 2,
|
||||
"max_steps": 20,
|
||||
"num_input_tokens_seen": 0,
|
||||
"num_train_epochs": 1,
|
||||
"save_steps": 25,
|
||||
"stateful_callbacks": {
|
||||
"TrainerControl": {
|
||||
"args": {
|
||||
"should_epoch_stop": false,
|
||||
"should_evaluate": false,
|
||||
"should_log": false,
|
||||
"should_save": true,
|
||||
"should_training_stop": true
|
||||
},
|
||||
"attributes": {}
|
||||
}
|
||||
},
|
||||
"total_flos": 0.0,
|
||||
"train_batch_size": 2,
|
||||
"trial_name": null,
|
||||
"trial_params": null
|
||||
}
|
||||
Reference in New Issue
Block a user