{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6724949562878278, "eval_steps": 500, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.026899798251513115, "grad_norm": 0.9497337273894788, "learning_rate": 9.879032258064517e-06, "loss": 0.21184968948364258, "step": 10 }, { "epoch": 0.05379959650302623, "grad_norm": 0.954080750401948, "learning_rate": 9.744623655913979e-06, "loss": 0.21339879035949708, "step": 20 }, { "epoch": 0.08069939475453934, "grad_norm": 0.9097806480768484, "learning_rate": 9.610215053763442e-06, "loss": 0.2068164110183716, "step": 30 }, { "epoch": 0.10759919300605246, "grad_norm": 1.0018783014353256, "learning_rate": 9.475806451612905e-06, "loss": 0.2059403896331787, "step": 40 }, { "epoch": 0.13449899125756556, "grad_norm": 0.9372500482771303, "learning_rate": 9.341397849462367e-06, "loss": 0.19799630641937255, "step": 50 }, { "epoch": 0.16139878950907868, "grad_norm": 0.9509184109653567, "learning_rate": 9.206989247311828e-06, "loss": 0.2008678436279297, "step": 60 }, { "epoch": 0.1882985877605918, "grad_norm": 1.1134282940647546, "learning_rate": 9.072580645161291e-06, "loss": 0.20529050827026368, "step": 70 }, { "epoch": 0.21519838601210492, "grad_norm": 0.9233328708641031, "learning_rate": 8.938172043010753e-06, "loss": 0.2078157901763916, "step": 80 }, { "epoch": 0.242098184263618, "grad_norm": 1.0158369506432157, "learning_rate": 8.803763440860216e-06, "loss": 0.2069852113723755, "step": 90 }, { "epoch": 0.26899798251513113, "grad_norm": 0.9962467911152623, "learning_rate": 8.669354838709677e-06, "loss": 0.19123213291168212, "step": 100 }, { "epoch": 0.29589778076664425, "grad_norm": 0.9050182554717234, "learning_rate": 8.53494623655914e-06, "loss": 0.1998592734336853, "step": 110 }, { "epoch": 0.32279757901815737, "grad_norm": 1.16989042706525, "learning_rate": 8.400537634408604e-06, "loss": 0.19017333984375, "step": 120 }, { "epoch": 0.3496973772696705, "grad_norm": 0.9202065176879053, "learning_rate": 8.266129032258065e-06, "loss": 0.1971895694732666, "step": 130 }, { "epoch": 0.3765971755211836, "grad_norm": 0.9882788749577818, "learning_rate": 8.131720430107529e-06, "loss": 0.20348339080810546, "step": 140 }, { "epoch": 0.4034969737726967, "grad_norm": 0.9666836025801647, "learning_rate": 7.99731182795699e-06, "loss": 0.18486206531524657, "step": 150 }, { "epoch": 0.43039677202420984, "grad_norm": 0.8578034645388888, "learning_rate": 7.862903225806451e-06, "loss": 0.19098201990127564, "step": 160 }, { "epoch": 0.45729657027572296, "grad_norm": 0.9758615729928365, "learning_rate": 7.728494623655915e-06, "loss": 0.18409807682037355, "step": 170 }, { "epoch": 0.484196368527236, "grad_norm": 0.9564910398607444, "learning_rate": 7.594086021505377e-06, "loss": 0.20637857913970947, "step": 180 }, { "epoch": 0.5110961667787491, "grad_norm": 1.09919047792078, "learning_rate": 7.459677419354839e-06, "loss": 0.1931600570678711, "step": 190 }, { "epoch": 0.5379959650302623, "grad_norm": 1.0745145882317149, "learning_rate": 7.325268817204302e-06, "loss": 0.19824893474578859, "step": 200 }, { "epoch": 0.5648957632817754, "grad_norm": 0.9633608114145525, "learning_rate": 7.190860215053764e-06, "loss": 0.2040123701095581, "step": 210 }, { "epoch": 0.5917955615332885, "grad_norm": 1.0540947069171986, "learning_rate": 7.056451612903227e-06, "loss": 0.200044584274292, "step": 220 }, { "epoch": 0.6186953597848016, "grad_norm": 0.9784264211901752, "learning_rate": 6.9220430107526895e-06, "loss": 0.2000995397567749, "step": 230 }, { "epoch": 0.6455951580363147, "grad_norm": 1.1286177734944867, "learning_rate": 6.787634408602151e-06, "loss": 0.1975030779838562, "step": 240 }, { "epoch": 0.6724949562878278, "grad_norm": 1.0140389237975715, "learning_rate": 6.653225806451613e-06, "loss": 0.1995392322540283, "step": 250 } ], "logging_steps": 10, "max_steps": 744, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2257087168512.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }