{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 91, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.40540540540540543, "grad_norm": 2.9168423719016774, "learning_rate": 1.6000000000000003e-05, "loss": 0.2887, "loss_nan_ranks": 0, "loss_rank_avg": 0.08512411266565323, "step": 5, "valid_targets_mean": 8782.1, "valid_targets_min": 1763 }, { "epoch": 0.8108108108108109, "grad_norm": 0.595660920768862, "learning_rate": 3.6e-05, "loss": 0.2156, "loss_nan_ranks": 0, "loss_rank_avg": 0.06299532949924469, "step": 10, "valid_targets_mean": 5523.5, "valid_targets_min": 718 }, { "epoch": 1.1621621621621623, "grad_norm": 0.37712343506232265, "learning_rate": 3.9759796989536185e-05, "loss": 0.1737, "loss_nan_ranks": 0, "loss_rank_avg": 0.050383515655994415, "step": 15, "valid_targets_mean": 5867.5, "valid_targets_min": 433 }, { "epoch": 1.5675675675675675, "grad_norm": 0.2347575723639623, "learning_rate": 3.879385241571817e-05, "loss": 0.1435, "loss_nan_ranks": 0, "loss_rank_avg": 0.04610014706850052, "step": 20, "valid_targets_mean": 7495.9, "valid_targets_min": 1500 }, { "epoch": 1.972972972972973, "grad_norm": 0.16981735187794836, "learning_rate": 3.7123337990605335e-05, "loss": 0.1272, "loss_nan_ranks": 0, "loss_rank_avg": 0.04025791585445404, "step": 25, "valid_targets_mean": 7071.2, "valid_targets_min": 342 }, { "epoch": 2.3243243243243246, "grad_norm": 0.21607756445297158, "learning_rate": 3.48108802621801e-05, "loss": 0.1199, "loss_nan_ranks": 0, "loss_rank_avg": 0.038428571075201035, "step": 30, "valid_targets_mean": 6605.4, "valid_targets_min": 476 }, { "epoch": 2.72972972972973, "grad_norm": 0.1316432415711279, "learning_rate": 3.194317183405573e-05, "loss": 0.1151, "loss_nan_ranks": 0, "loss_rank_avg": 0.03687749803066254, "step": 35, "valid_targets_mean": 6514.1, "valid_targets_min": 615 }, { "epoch": 3.081081081081081, "grad_norm": 0.1311970853547964, "learning_rate": 2.8627721313625073e-05, "loss": 0.1049, "loss_nan_ranks": 0, "loss_rank_avg": 0.02935028076171875, "step": 40, "valid_targets_mean": 5418.0, "valid_targets_min": 1029 }, { "epoch": 3.4864864864864864, "grad_norm": 0.12539144867417315, "learning_rate": 2.4988822881159627e-05, "loss": 0.1049, "loss_nan_ranks": 0, "loss_rank_avg": 0.03292456641793251, "step": 45, "valid_targets_mean": 6729.9, "valid_targets_min": 1521 }, { "epoch": 3.891891891891892, "grad_norm": 0.13127449105052028, "learning_rate": 2.1162896578209517e-05, "loss": 0.1008, "loss_nan_ranks": 0, "loss_rank_avg": 0.030270356684923172, "step": 50, "valid_targets_mean": 6693.1, "valid_targets_min": 1410 }, { "epoch": 4.243243243243243, "grad_norm": 0.12876211465298387, "learning_rate": 1.7293374004997384e-05, "loss": 0.0981, "loss_nan_ranks": 0, "loss_rank_avg": 0.03473636507987976, "step": 55, "valid_targets_mean": 7737.6, "valid_targets_min": 372 }, { "epoch": 4.648648648648649, "grad_norm": 0.12816877872251659, "learning_rate": 1.3525321158833582e-05, "loss": 0.0957, "loss_nan_ranks": 0, "loss_rank_avg": 0.028843272477388382, "step": 60, "valid_targets_mean": 5910.2, "valid_targets_min": 802 }, { "epoch": 5.0, "grad_norm": 0.23264567962716864, "learning_rate": 1.0000000000000006e-05, "loss": 0.0926, "loss_nan_ranks": 0, "loss_rank_avg": 0.08814641833305359, "step": 65, "valid_targets_mean": 6465.5, "valid_targets_min": 802 }, { "epoch": 5.405405405405405, "grad_norm": 0.13528023785947504, "learning_rate": 6.84957262861873e-06, "loss": 0.0928, "loss_nan_ranks": 0, "loss_rank_avg": 0.036027610301971436, "step": 70, "valid_targets_mean": 7925.5, "valid_targets_min": 2353 }, { "epoch": 5.8108108108108105, "grad_norm": 0.13158309042631827, "learning_rate": 4.1921466096248164e-06, "loss": 0.0914, "loss_nan_ranks": 0, "loss_rank_avg": 0.030884653329849243, "step": 75, "valid_targets_mean": 7174.2, "valid_targets_min": 1482 }, { "epoch": 6.162162162162162, "grad_norm": 0.12264132333565113, "learning_rate": 2.127347193531757e-06, "loss": 0.0936, "loss_nan_ranks": 0, "loss_rank_avg": 0.030903231352567673, "step": 80, "valid_targets_mean": 6811.2, "valid_targets_min": 1310 }, { "epoch": 6.5675675675675675, "grad_norm": 0.11272346517044092, "learning_rate": 7.325824276823934e-07, "loss": 0.0893, "loss_nan_ranks": 0, "loss_rank_avg": 0.03379923850297928, "step": 85, "valid_targets_mean": 7168.5, "valid_targets_min": 615 }, { "epoch": 6.972972972972973, "grad_norm": 0.23203422934347503, "learning_rate": 6.014117664415953e-08, "loss": 0.0919, "loss_nan_ranks": 0, "loss_rank_avg": 0.02850460261106491, "step": 90, "valid_targets_mean": 6414.8, "valid_targets_min": 1121 }, { "epoch": 7.0, "step": 91, "total_flos": 4.487730344563835e+17, "train_loss": 0.0, "train_runtime": 0.9071, "train_samples_per_second": 8967.387, "train_steps_per_second": 100.324 } ], "logging_steps": 5, "max_steps": 91, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.487730344563835e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }