Files
ModelHub XC 9c46ef56a1 初始化项目,由ModelHub XC社区提供模型
Model: harsha070/expfinal-qwen-island-s42-lambda-0p25
Source: Original Platform
2026-06-16 07:51:18 +08:00

483 lines
17 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.28,
"eval_steps": 500,
"global_step": 160,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 959.3,
"completions/max_terminated_length": 883.0,
"completions/mean_length": 628.5875,
"completions/mean_terminated_length": 604.4089508056641,
"completions/min_length": 324.2,
"completions/min_terminated_length": 324.2,
"entropy": 0.8960087668150664,
"epoch": 0.08,
"frac_reward_zero_std": 0.225,
"grad_norm": 1.734375,
"kl": 0.04410275101472507,
"learning_rate": 9.4375e-06,
"loss": -0.004480601102113724,
"num_tokens": 70931.0,
"reward": 0.637615966796875,
"reward_std": 0.4684956520795822,
"rewards/JointRewardFunction/mean": 0.637615966796875,
"rewards/JointRewardFunction/std": 0.4684956640005112,
"step": 10,
"step_time": 39.2735027824996
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1375,
"completions/max_length": 943.7,
"completions/max_terminated_length": 829.8,
"completions/mean_length": 631.4625,
"completions/mean_terminated_length": 579.1692932128906,
"completions/min_length": 324.2,
"completions/min_terminated_length": 324.2,
"entropy": 1.0276442520320415,
"epoch": 0.16,
"frac_reward_zero_std": 0.25,
"grad_norm": 1.703125,
"kl": 0.08849158070515842,
"learning_rate": 8.8125e-06,
"loss": 0.01496470272541046,
"num_tokens": 142092.0,
"reward": 0.539324951171875,
"reward_std": 0.49008582532405853,
"rewards/JointRewardFunction/mean": 0.539324951171875,
"rewards/JointRewardFunction/std": 0.4900858402252197,
"step": 20,
"step_time": 40.54311619299951
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0125,
"completions/max_length": 863.4,
"completions/max_terminated_length": 835.9,
"completions/mean_length": 554.5875,
"completions/mean_terminated_length": 548.3660766601563,
"completions/min_length": 307.3,
"completions/min_terminated_length": 307.3,
"entropy": 1.1838637091219426,
"epoch": 0.24,
"frac_reward_zero_std": 0.1,
"grad_norm": 3.046875,
"kl": 0.11827877229079604,
"learning_rate": 8.1875e-06,
"loss": 0.009982097893953323,
"num_tokens": 206903.0,
"reward": 0.6125,
"reward_std": 0.4704344987869263,
"rewards/JointRewardFunction/mean": 0.6125,
"rewards/JointRewardFunction/std": 0.47043450474739074,
"step": 30,
"step_time": 37.010521245800916
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.025,
"completions/max_length": 742.1,
"completions/max_terminated_length": 659.0,
"completions/mean_length": 425.7,
"completions/mean_terminated_length": 410.8892883300781,
"completions/min_length": 219.2,
"completions/min_terminated_length": 219.2,
"entropy": 1.2652597405016421,
"epoch": 0.32,
"frac_reward_zero_std": 0.15,
"grad_norm": 6.125,
"kl": 0.3052154924720526,
"learning_rate": 7.5625e-06,
"loss": 0.027592796087265014,
"num_tokens": 261533.0,
"reward": 0.61771240234375,
"reward_std": 0.46430147290229795,
"rewards/JointRewardFunction/mean": 0.61771240234375,
"rewards/JointRewardFunction/std": 0.46430149376392366,
"step": 40,
"step_time": 32.063390286398864
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0875,
"completions/max_length": 919.3,
"completions/max_terminated_length": 713.2,
"completions/mean_length": 511.4125,
"completions/mean_terminated_length": 464.44822998046874,
"completions/min_length": 244.6,
"completions/min_terminated_length": 244.6,
"entropy": 1.3154176332056522,
"epoch": 0.4,
"frac_reward_zero_std": 0.1,
"grad_norm": 3.6875,
"kl": 0.24474074998870493,
"learning_rate": 6.9375e-06,
"loss": 0.001818625070154667,
"num_tokens": 323276.0,
"reward": 0.537213134765625,
"reward_std": 0.5033262223005295,
"rewards/JointRewardFunction/mean": 0.537213134765625,
"rewards/JointRewardFunction/std": 0.5033262312412262,
"step": 50,
"step_time": 39.09056931000159
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 824.5,
"completions/max_terminated_length": 711.6,
"completions/mean_length": 520.4625,
"completions/mean_terminated_length": 484.48512268066406,
"completions/min_length": 261.1,
"completions/min_terminated_length": 261.1,
"entropy": 1.2718341693282127,
"epoch": 0.48,
"frac_reward_zero_std": 0.225,
"grad_norm": 4.6875,
"kl": 0.2580411507748067,
"learning_rate": 6.3125e-06,
"loss": 0.014349016547203063,
"num_tokens": 385253.0,
"reward": 0.54111328125,
"reward_std": 0.4758811920881271,
"rewards/JointRewardFunction/mean": 0.54111328125,
"rewards/JointRewardFunction/std": 0.47588120102882386,
"step": 60,
"step_time": 35.274126689498736
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 943.6,
"completions/max_terminated_length": 808.7,
"completions/mean_length": 537.75,
"completions/mean_terminated_length": 502.15358276367186,
"completions/min_length": 267.1,
"completions/min_terminated_length": 267.1,
"entropy": 1.244665590673685,
"epoch": 0.56,
"frac_reward_zero_std": 0.175,
"grad_norm": 4.0625,
"kl": 0.24244523425586523,
"learning_rate": 5.6875e-06,
"loss": 0.0023317448794841766,
"num_tokens": 448729.0,
"reward": 0.69791259765625,
"reward_std": 0.4654367908835411,
"rewards/JointRewardFunction/mean": 0.69791259765625,
"rewards/JointRewardFunction/std": 0.4654367953538895,
"step": 70,
"step_time": 40.008808337500525
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05,
"completions/max_length": 810.1,
"completions/max_terminated_length": 720.2,
"completions/mean_length": 534.1625,
"completions/mean_terminated_length": 508.99286193847655,
"completions/min_length": 286.4,
"completions/min_terminated_length": 286.4,
"entropy": 1.2685907267034053,
"epoch": 0.64,
"frac_reward_zero_std": 0.225,
"grad_norm": 4.125,
"kl": 0.2162147051654756,
"learning_rate": 5.0625e-06,
"loss": 0.014069165289402007,
"num_tokens": 512076.0,
"reward": 0.728045654296875,
"reward_std": 0.5096077308058738,
"rewards/JointRewardFunction/mean": 0.728045654296875,
"rewards/JointRewardFunction/std": 0.5096077516674995,
"step": 80,
"step_time": 34.66793936170143
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.025,
"completions/max_length": 793.3,
"completions/max_terminated_length": 733.4,
"completions/mean_length": 492.2,
"completions/mean_terminated_length": 478.558935546875,
"completions/min_length": 245.4,
"completions/min_terminated_length": 245.4,
"entropy": 1.24496211335063,
"epoch": 0.72,
"frac_reward_zero_std": 0.175,
"grad_norm": 4.03125,
"kl": 0.24482853645458819,
"learning_rate": 4.4375e-06,
"loss": 0.032863426208496097,
"num_tokens": 572072.0,
"reward": 0.64200439453125,
"reward_std": 0.5001831084489823,
"rewards/JointRewardFunction/mean": 0.64200439453125,
"rewards/JointRewardFunction/std": 0.5001831203699112,
"step": 90,
"step_time": 34.359502547597
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 768.8,
"completions/max_terminated_length": 768.8,
"completions/mean_length": 475.65,
"completions/mean_terminated_length": 475.65,
"completions/min_length": 258.8,
"completions/min_terminated_length": 258.8,
"entropy": 1.330046895891428,
"epoch": 0.8,
"frac_reward_zero_std": 0.175,
"grad_norm": 3.375,
"kl": 0.24854949009604752,
"learning_rate": 3.8125e-06,
"loss": 0.0744681715965271,
"num_tokens": 630790.0,
"reward": 0.725848388671875,
"reward_std": 0.4486017137765884,
"rewards/JointRewardFunction/mean": 0.725848388671875,
"rewards/JointRewardFunction/std": 0.4486017107963562,
"step": 100,
"step_time": 33.44759687739897
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0125,
"completions/max_length": 695.2,
"completions/max_terminated_length": 669.5,
"completions/mean_length": 443.0125,
"completions/mean_terminated_length": 436.7017883300781,
"completions/min_length": 213.1,
"completions/min_terminated_length": 213.1,
"entropy": 1.3013170935213565,
"epoch": 0.88,
"frac_reward_zero_std": 0.275,
"grad_norm": 3.984375,
"kl": 0.3150750307366252,
"learning_rate": 3.1875e-06,
"loss": 0.020898757874965666,
"num_tokens": 686775.0,
"reward": 0.71988525390625,
"reward_std": 0.515497374534607,
"rewards/JointRewardFunction/mean": 0.71988525390625,
"rewards/JointRewardFunction/std": 0.515497374534607,
"step": 110,
"step_time": 30.408145976099696
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.025,
"completions/max_length": 757.0,
"completions/max_terminated_length": 667.6,
"completions/mean_length": 428.675,
"completions/mean_terminated_length": 412.0500061035156,
"completions/min_length": 201.6,
"completions/min_terminated_length": 201.6,
"entropy": 1.2176271453499794,
"epoch": 0.96,
"frac_reward_zero_std": 0.175,
"grad_norm": 6.53125,
"kl": 0.29011352979578076,
"learning_rate": 2.5625e-06,
"loss": 0.023420125246047974,
"num_tokens": 741441.0,
"reward": 0.738653564453125,
"reward_std": 0.45226994156837463,
"rewards/JointRewardFunction/mean": 0.738653564453125,
"rewards/JointRewardFunction/std": 0.45226994901895523,
"step": 120,
"step_time": 32.81503715240033
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 645.8,
"completions/max_terminated_length": 645.8,
"completions/mean_length": 429.1875,
"completions/mean_terminated_length": 429.1875,
"completions/min_length": 218.8,
"completions/min_terminated_length": 218.8,
"entropy": 1.313951000571251,
"epoch": 1.04,
"frac_reward_zero_std": 0.3,
"grad_norm": 3.78125,
"kl": 0.3095056655351073,
"learning_rate": 1.9375e-06,
"loss": 0.031660494208335874,
"num_tokens": 796394.0,
"reward": 0.7450439453125,
"reward_std": 0.46799357831478117,
"rewards/JointRewardFunction/mean": 0.7450439453125,
"rewards/JointRewardFunction/std": 0.46799357831478117,
"step": 130,
"step_time": 28.36433180910135
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 661.5,
"completions/max_terminated_length": 661.5,
"completions/mean_length": 416.2625,
"completions/mean_terminated_length": 416.2625,
"completions/min_length": 235.6,
"completions/min_terminated_length": 235.6,
"entropy": 1.3542070075869561,
"epoch": 1.12,
"frac_reward_zero_std": 0.2,
"grad_norm": 2.328125,
"kl": 0.3334041152149439,
"learning_rate": 1.3125000000000001e-06,
"loss": 0.022475141286849975,
"num_tokens": 850343.0,
"reward": 0.67725830078125,
"reward_std": 0.3997616931796074,
"rewards/JointRewardFunction/mean": 0.67725830078125,
"rewards/JointRewardFunction/std": 0.399761700630188,
"step": 140,
"step_time": 28.77159399340053
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 703.9,
"completions/max_terminated_length": 703.9,
"completions/mean_length": 422.925,
"completions/mean_terminated_length": 422.925,
"completions/min_length": 209.7,
"completions/min_terminated_length": 209.7,
"entropy": 1.2913881182670592,
"epoch": 1.2,
"frac_reward_zero_std": 0.2,
"grad_norm": 3.734375,
"kl": 0.31345505844801663,
"learning_rate": 6.875000000000001e-07,
"loss": 0.029654264450073242,
"num_tokens": 904753.0,
"reward": 0.79158935546875,
"reward_std": 0.4540836468338966,
"rewards/JointRewardFunction/mean": 0.79158935546875,
"rewards/JointRewardFunction/std": 0.45408365726470945,
"step": 150,
"step_time": 30.629617839398996
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 645.8,
"completions/max_terminated_length": 645.8,
"completions/mean_length": 411.0125,
"completions/mean_terminated_length": 411.0125,
"completions/min_length": 207.4,
"completions/min_terminated_length": 207.4,
"entropy": 1.2935365058481694,
"epoch": 1.28,
"frac_reward_zero_std": 0.175,
"grad_norm": 3.171875,
"kl": 0.3413598489947617,
"learning_rate": 6.250000000000001e-08,
"loss": -0.008248078823089599,
"num_tokens": 958536.0,
"reward": 0.695233154296875,
"reward_std": 0.40624782145023347,
"rewards/JointRewardFunction/mean": 0.695233154296875,
"rewards/JointRewardFunction/std": 0.40624783337116244,
"step": 160,
"step_time": 28.084300646101475
}
],
"logging_steps": 10,
"max_steps": 160,
"num_input_tokens_seen": 958536,
"num_train_epochs": 2,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}