{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9992144540455616, "eval_steps": 500, "global_step": 636, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.031421838177533384, "grad_norm": 0.7421875, "learning_rate": 9.999750543489357e-06, "loss": 1.3149, "step": 10 }, { "epoch": 0.06284367635506677, "grad_norm": 0.453125, "learning_rate": 9.991022178937775e-06, "loss": 0.9124, "step": 20 }, { "epoch": 0.09426551453260015, "grad_norm": 0.99609375, "learning_rate": 9.96984586911221e-06, "loss": 0.8294, "step": 30 }, { "epoch": 0.12568735271013354, "grad_norm": 0.294921875, "learning_rate": 9.936274429154798e-06, "loss": 0.8129, "step": 40 }, { "epoch": 0.15710919088766692, "grad_norm": 0.33984375, "learning_rate": 9.890391588496586e-06, "loss": 0.7844, "step": 50 }, { "epoch": 0.1885310290652003, "grad_norm": 0.283203125, "learning_rate": 9.832311782030674e-06, "loss": 0.7959, "step": 60 }, { "epoch": 0.2199528672427337, "grad_norm": 0.306640625, "learning_rate": 9.762179864703908e-06, "loss": 0.7767, "step": 70 }, { "epoch": 0.2513747054202671, "grad_norm": 0.3828125, "learning_rate": 9.680170750238873e-06, "loss": 0.7704, "step": 80 }, { "epoch": 0.2827965435978005, "grad_norm": 0.3984375, "learning_rate": 9.586488974887312e-06, "loss": 0.7644, "step": 90 }, { "epoch": 0.31421838177533384, "grad_norm": 0.3359375, "learning_rate": 9.48136818730293e-06, "loss": 0.7523, "step": 100 }, { "epoch": 0.34564021995286726, "grad_norm": 0.31640625, "learning_rate": 9.365070565805941e-06, "loss": 0.7683, "step": 110 }, { "epoch": 0.3770620581304006, "grad_norm": 0.2578125, "learning_rate": 9.23788616449268e-06, "loss": 0.7561, "step": 120 }, { "epoch": 0.408483896307934, "grad_norm": 0.36328125, "learning_rate": 9.100132189821156e-06, "loss": 0.7601, "step": 130 }, { "epoch": 0.4399057344854674, "grad_norm": 0.376953125, "learning_rate": 8.952152209476812e-06, "loss": 0.7559, "step": 140 }, { "epoch": 0.4713275726630008, "grad_norm": 0.279296875, "learning_rate": 8.794315295491571e-06, "loss": 0.734, "step": 150 }, { "epoch": 0.5027494108405341, "grad_norm": 0.349609375, "learning_rate": 8.62701510375336e-06, "loss": 0.7567, "step": 160 }, { "epoch": 0.5341712490180676, "grad_norm": 0.1904296875, "learning_rate": 8.450668892201819e-06, "loss": 0.7396, "step": 170 }, { "epoch": 0.565593087195601, "grad_norm": 0.2490234375, "learning_rate": 8.265716480158914e-06, "loss": 0.7424, "step": 180 }, { "epoch": 0.5970149253731343, "grad_norm": 0.25390625, "learning_rate": 8.072619151389951e-06, "loss": 0.7442, "step": 190 }, { "epoch": 0.6284367635506677, "grad_norm": 0.423828125, "learning_rate": 7.8718585036308e-06, "loss": 0.7211, "step": 200 }, { "epoch": 0.6598586017282011, "grad_norm": 0.2041015625, "learning_rate": 7.663935247450733e-06, "loss": 0.743, "step": 210 }, { "epoch": 0.6912804399057345, "grad_norm": 0.322265625, "learning_rate": 7.449367957446566e-06, "loss": 0.7389, "step": 220 }, { "epoch": 0.7227022780832679, "grad_norm": 0.81640625, "learning_rate": 7.2286917788826926e-06, "loss": 0.7329, "step": 230 }, { "epoch": 0.7541241162608012, "grad_norm": 0.416015625, "learning_rate": 7.002457093002785e-06, "loss": 0.7447, "step": 240 }, { "epoch": 0.7855459544383346, "grad_norm": 0.474609375, "learning_rate": 6.77122814434192e-06, "loss": 0.7209, "step": 250 }, { "epoch": 0.816967792615868, "grad_norm": 0.28125, "learning_rate": 6.535581633462728e-06, "loss": 0.7413, "step": 260 }, { "epoch": 0.8483896307934015, "grad_norm": 0.3828125, "learning_rate": 6.296105278625368e-06, "loss": 0.7308, "step": 270 }, { "epoch": 0.8798114689709348, "grad_norm": 0.302734375, "learning_rate": 6.053396349978632e-06, "loss": 0.7302, "step": 280 }, { "epoch": 0.9112333071484682, "grad_norm": 0.7109375, "learning_rate": 5.808060179927983e-06, "loss": 0.7383, "step": 290 }, { "epoch": 0.9426551453260016, "grad_norm": 0.4140625, "learning_rate": 5.560708653395796e-06, "loss": 0.7183, "step": 300 }, { "epoch": 0.974076983503535, "grad_norm": 0.318359375, "learning_rate": 5.3119586817391655e-06, "loss": 0.7379, "step": 310 }, { "epoch": 1.0062843676355067, "grad_norm": 0.236328125, "learning_rate": 5.062430664131463e-06, "loss": 0.7395, "step": 320 }, { "epoch": 1.03770620581304, "grad_norm": 0.31640625, "learning_rate": 4.812746940245032e-06, "loss": 0.7115, "step": 330 }, { "epoch": 1.0691280439905735, "grad_norm": 1.0625, "learning_rate": 4.563530238094161e-06, "loss": 0.7328, "step": 340 }, { "epoch": 1.1005498821681068, "grad_norm": 0.4921875, "learning_rate": 4.315402120909486e-06, "loss": 0.7178, "step": 350 }, { "epoch": 1.1319717203456403, "grad_norm": 0.287109375, "learning_rate": 4.068981436917471e-06, "loss": 0.7277, "step": 360 }, { "epoch": 1.1633935585231736, "grad_norm": 0.2373046875, "learning_rate": 3.8248827758912535e-06, "loss": 0.7299, "step": 370 }, { "epoch": 1.194815396700707, "grad_norm": 0.35546875, "learning_rate": 3.5837149363224045e-06, "loss": 0.7189, "step": 380 }, { "epoch": 1.2262372348782404, "grad_norm": 0.296875, "learning_rate": 3.3460794070364923e-06, "loss": 0.7371, "step": 390 }, { "epoch": 1.2576590730557737, "grad_norm": 0.2734375, "learning_rate": 3.1125688670394682e-06, "loss": 0.7148, "step": 400 }, { "epoch": 1.2890809112333073, "grad_norm": 0.365234375, "learning_rate": 2.8837657073363344e-06, "loss": 0.7275, "step": 410 }, { "epoch": 1.3205027494108406, "grad_norm": 0.5859375, "learning_rate": 2.660240578408776e-06, "loss": 0.7234, "step": 420 }, { "epoch": 1.3519245875883739, "grad_norm": 0.498046875, "learning_rate": 2.442550966974444e-06, "loss": 0.7171, "step": 430 }, { "epoch": 1.3833464257659074, "grad_norm": 0.26171875, "learning_rate": 2.2312398055775496e-06, "loss": 0.7332, "step": 440 }, { "epoch": 1.4147682639434407, "grad_norm": 0.220703125, "learning_rate": 2.0268341184785674e-06, "loss": 0.7167, "step": 450 }, { "epoch": 1.446190102120974, "grad_norm": 0.380859375, "learning_rate": 1.829843707220243e-06, "loss": 0.7235, "step": 460 }, { "epoch": 1.4776119402985075, "grad_norm": 0.2255859375, "learning_rate": 1.6407598791482294e-06, "loss": 0.7253, "step": 470 }, { "epoch": 1.5090337784760408, "grad_norm": 0.359375, "learning_rate": 1.4600542220575038e-06, "loss": 0.7132, "step": 480 }, { "epoch": 1.5404556166535741, "grad_norm": 0.3828125, "learning_rate": 1.2881774280206554e-06, "loss": 0.7309, "step": 490 }, { "epoch": 1.5718774548311076, "grad_norm": 0.4375, "learning_rate": 1.125558169331521e-06, "loss": 0.7232, "step": 500 }, { "epoch": 1.6032992930086412, "grad_norm": 0.2734375, "learning_rate": 9.726020293676308e-07, "loss": 0.7284, "step": 510 }, { "epoch": 1.6347211311861742, "grad_norm": 0.330078125, "learning_rate": 8.296904910379622e-07, "loss": 0.7282, "step": 520 }, { "epoch": 1.6661429693637078, "grad_norm": 1.5078125, "learning_rate": 6.971799853388867e-07, "loss": 0.7159, "step": 530 }, { "epoch": 1.6975648075412413, "grad_norm": 1.1171875, "learning_rate": 5.754010023912693e-07, "loss": 0.7349, "step": 540 }, { "epoch": 1.7289866457187746, "grad_norm": 0.328125, "learning_rate": 4.646572671758687e-07, "loss": 0.7208, "step": 550 }, { "epoch": 1.7604084838963079, "grad_norm": 0.361328125, "learning_rate": 3.6522498202278434e-07, "loss": 0.7253, "step": 560 }, { "epoch": 1.7918303220738414, "grad_norm": 0.283203125, "learning_rate": 2.773521377442523e-07, "loss": 0.7246, "step": 570 }, { "epoch": 1.8232521602513747, "grad_norm": 0.484375, "learning_rate": 2.012578951288724e-07, "loss": 0.7161, "step": 580 }, { "epoch": 1.854673998428908, "grad_norm": 0.298828125, "learning_rate": 1.3713203833984446e-07, "loss": 0.7316, "step": 590 }, { "epoch": 1.8860958366064415, "grad_norm": 0.234375, "learning_rate": 8.513450158049109e-08, "loss": 0.7133, "step": 600 }, { "epoch": 1.9175176747839748, "grad_norm": 0.2353515625, "learning_rate": 4.5394970207591206e-08, "loss": 0.7306, "step": 610 }, { "epoch": 1.9489395129615081, "grad_norm": 1.0390625, "learning_rate": 1.8012557287367394e-08, "loss": 0.722, "step": 620 }, { "epoch": 1.9803613511390417, "grad_norm": 0.408203125, "learning_rate": 3.0555564008305504e-09, "loss": 0.717, "step": 630 } ], "logging_steps": 10, "max_steps": 636, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.042156006349629e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }