Files
Qwen2.5-0.5B-Instruct-Gensy…/trainer_state.json
ModelHub XC a8e4f126ef 初始化项目,由ModelHub XC社区提供模型
Model: Mahdikppp/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-invisible_ravenous_mongoose
Source: Original Platform
2026-05-18 07:56:02 +08:00

444 lines
18 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.975609756097561,
"eval_steps": 500,
"global_step": 20,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 4.0,
"completions/max_terminated_length": 4.0,
"completions/mean_length": 4.0,
"completions/mean_terminated_length": 4.0,
"completions/min_length": 4.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.0975609756097561,
"frac_reward_zero_std": 0.75,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 5e-07,
"loss": -0.0,
"num_tokens": 2080.0,
"reward": 2.107478141784668,
"reward_std": 0.7532989382743835,
"rewards/concensus_correctness_reward_func/mean": 0.8445000648498535,
"rewards/concensus_correctness_reward_func/std": 0.5630000233650208,
"rewards/consensus_reward_func/mean": 1.25,
"rewards/consensus_reward_func/std": 1.0773502588272095,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.012978244572877884,
"rewards/question_recreation_reward_func/std": 0.004188001621514559,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.0,
"rewards/xmlcount_reward_func/std": 0.0,
"step": 2
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 4.0,
"completions/max_terminated_length": 4.0,
"completions/mean_length": 3.5,
"completions/mean_terminated_length": 3.5,
"completions/min_length": 3.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.1951219512195122,
"frac_reward_zero_std": 0.75,
"grad_norm": 3.73433053368899e-08,
"kl": 0.000426415354013443,
"learning_rate": 4.864543104251586e-07,
"loss": 0.0,
"num_tokens": 4156.0,
"reward": 1.6496462225914001,
"reward_std": 0.5526039600372314,
"rewards/concensus_correctness_reward_func/mean": 1.140749990940094,
"rewards/concensus_correctness_reward_func/std": 1.647525429725647,
"rewards/consensus_reward_func/mean": 0.5,
"rewards/consensus_reward_func/std": 0.5773502588272095,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.008896212559193373,
"rewards/question_recreation_reward_func/std": 0.0068460507318377495,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.0,
"rewards/xmlcount_reward_func/std": 0.0,
"step": 4
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 4.0,
"completions/max_terminated_length": 4.0,
"completions/mean_length": 3.5,
"completions/mean_terminated_length": 3.5,
"completions/min_length": 3.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.2926829268292683,
"frac_reward_zero_std": 1.0,
"grad_norm": 6.071777391980504e-08,
"kl": 2.3096799850463867e-06,
"learning_rate": 4.472851273490984e-07,
"loss": 0.0,
"num_tokens": 6232.0,
"reward": 1.2626911401748657,
"reward_std": 0.0,
"rewards/concensus_correctness_reward_func/mean": 0.7299999892711639,
"rewards/concensus_correctness_reward_func/std": 0.8429313898086548,
"rewards/consensus_reward_func/mean": 0.5,
"rewards/consensus_reward_func/std": 0.5773502588272095,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.03269112668931484,
"rewards/question_recreation_reward_func/std": 0.0033300668001174927,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.0,
"rewards/xmlcount_reward_func/std": 0.0,
"step": 6
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 42.0,
"completions/max_terminated_length": 42.0,
"completions/mean_length": 20.5,
"completions/mean_terminated_length": 20.5,
"completions/min_length": 4.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.3902439024390244,
"frac_reward_zero_std": 0.75,
"grad_norm": 0.0034936980810016394,
"kl": 0.09646054328186437,
"learning_rate": 3.867370395306068e-07,
"loss": 0.0001,
"num_tokens": 8444.0,
"reward": 4.392500638961792,
"reward_std": 0.011391956359148026,
"rewards/concensus_correctness_reward_func/mean": 2.6324999928474426,
"rewards/concensus_correctness_reward_func/std": 0.7765360977500677,
"rewards/consensus_reward_func/mean": 1.5,
"rewards/consensus_reward_func/std": 0.5773502588272095,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.011250502662733197,
"rewards/question_recreation_reward_func/std": 0.00992788770236075,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.2487500011920929,
"rewards/xmlcount_reward_func/std": 0.28787195682525635,
"step": 8
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 4.0,
"completions/max_terminated_length": 4.0,
"completions/mean_length": 4.0,
"completions/mean_terminated_length": 4.0,
"completions/min_length": 4.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.4878048780487805,
"frac_reward_zero_std": 1.0,
"grad_norm": 3.545432392115799e-08,
"kl": 0.010027587413787842,
"learning_rate": 3.1137137178519977e-07,
"loss": 0.0,
"num_tokens": 10524.0,
"reward": 2.497630000114441,
"reward_std": 0.0,
"rewards/concensus_correctness_reward_func/mean": 0.9599999785423279,
"rewards/concensus_correctness_reward_func/std": 0.0,
"rewards/consensus_reward_func/mean": 1.5,
"rewards/consensus_reward_func/std": 0.5773502588272095,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.03762998804450035,
"rewards/question_recreation_reward_func/std": 0.0033961788285523653,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.0,
"rewards/xmlcount_reward_func/std": 0.0,
"step": 10
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 4.0,
"completions/max_terminated_length": 4.0,
"completions/mean_length": 4.0,
"completions/mean_terminated_length": 4.0,
"completions/min_length": 4.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.5853658536585366,
"frac_reward_zero_std": 1.0,
"grad_norm": 5.641782152565611e-09,
"kl": 0.04576372355222702,
"learning_rate": 2.2935516363191693e-07,
"loss": 0.0,
"num_tokens": 12604.0,
"reward": 4.48307740688324,
"reward_std": 0.0,
"rewards/concensus_correctness_reward_func/mean": 2.940999984741211,
"rewards/concensus_correctness_reward_func/std": 1.1304517984390259,
"rewards/consensus_reward_func/mean": 1.5,
"rewards/consensus_reward_func/std": 0.5773502588272095,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.04207756486721337,
"rewards/question_recreation_reward_func/std": 0.0,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.0,
"rewards/xmlcount_reward_func/std": 0.0,
"step": 12
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 4.5,
"completions/max_terminated_length": 4.5,
"completions/mean_length": 4.125,
"completions/mean_terminated_length": 4.125,
"completions/min_length": 4.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.6829268292682927,
"frac_reward_zero_std": 0.75,
"grad_norm": 223.39295959472656,
"kl": 0.20429373532533646,
"learning_rate": 1.4957614383675767e-07,
"loss": 0.0002,
"num_tokens": 14685.0,
"reward": 1.2401338815689087,
"reward_std": 0.34923410415649414,
"rewards/concensus_correctness_reward_func/mean": 0.4790000021457672,
"rewards/concensus_correctness_reward_func/std": 0.5531015992164612,
"rewards/consensus_reward_func/mean": 0.75,
"rewards/consensus_reward_func/std": 1.0773502588272095,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.011133914813399315,
"rewards/question_recreation_reward_func/std": 0.005399489309638739,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.0,
"rewards/xmlcount_reward_func/std": 0.0,
"step": 14
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 4.0,
"completions/max_terminated_length": 4.0,
"completions/mean_length": 4.0,
"completions/mean_terminated_length": 4.0,
"completions/min_length": 4.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.7804878048780488,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.00020408314594533294,
"kl": 0.20127877593040466,
"learning_rate": 8.067960709356478e-08,
"loss": 0.0002,
"num_tokens": 16765.0,
"reward": 3.500740647315979,
"reward_std": 0.0,
"rewards/concensus_correctness_reward_func/mean": 1.9600000381469727,
"rewards/concensus_correctness_reward_func/std": 0.0,
"rewards/consensus_reward_func/mean": 1.5,
"rewards/consensus_reward_func/std": 0.5773502588272095,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.04074074048548937,
"rewards/question_recreation_reward_func/std": 0.0,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.0,
"rewards/xmlcount_reward_func/std": 0.0,
"step": 16
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 4.0,
"completions/max_terminated_length": 4.0,
"completions/mean_length": 4.0,
"completions/mean_terminated_length": 4.0,
"completions/min_length": 4.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.8780487804878049,
"frac_reward_zero_std": 1.0,
"grad_norm": 7.515780886535595e-09,
"kl": 0.0,
"learning_rate": 3.013156219837776e-08,
"loss": 0.0,
"num_tokens": 18845.0,
"reward": 3.4462019205093384,
"reward_std": 0.0,
"rewards/concensus_correctness_reward_func/mean": 1.9130000472068787,
"rewards/concensus_correctness_reward_func/std": 0.0519615113735199,
"rewards/consensus_reward_func/mean": 1.5,
"rewards/consensus_reward_func/std": 0.5773502588272095,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.03320199949666858,
"rewards/question_recreation_reward_func/std": 0.0,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.0,
"rewards/xmlcount_reward_func/std": 0.0,
"step": 18
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 4.0,
"completions/max_terminated_length": 4.0,
"completions/mean_length": 4.0,
"completions/mean_terminated_length": 4.0,
"completions/min_length": 4.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.975609756097561,
"frac_reward_zero_std": 1.0,
"grad_norm": 2.6966833210906316e-09,
"kl": 0.32504162192344666,
"learning_rate": 3.4096741493194193e-09,
"loss": 0.0003,
"num_tokens": 20925.0,
"reward": 4.6496992111206055,
"reward_std": 0.0,
"rewards/concensus_correctness_reward_func/mean": 3.6059999465942383,
"rewards/concensus_correctness_reward_func/std": 0.0,
"rewards/consensus_reward_func/mean": 1.0,
"rewards/consensus_reward_func/std": 0.0,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.0436993483453989,
"rewards/question_recreation_reward_func/std": 0.0,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.0,
"rewards/xmlcount_reward_func/std": 0.0,
"step": 20
},
{
"epoch": 0.975609756097561,
"step": 20,
"total_flos": 0.0,
"train_loss": 8.8329258843034e-05,
"train_runtime": 703.5963,
"train_samples_per_second": 0.114,
"train_steps_per_second": 0.028
}
],
"logging_steps": 2,
"max_steps": 20,
"num_input_tokens_seen": 20925,
"num_train_epochs": 1,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}