87 lines
2.1 KiB
JSON
87 lines
2.1 KiB
JSON
{
|
|
"best_global_step": 300,
|
|
"best_metric": 0.43492111563682556,
|
|
"best_model_checkpoint": "./qwen3_full/checkpoint-300",
|
|
"epoch": 0.07167709468842863,
|
|
"eval_steps": 300,
|
|
"global_step": 300,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.014335418937685725,
|
|
"grad_norm": 1.4510557651519775,
|
|
"learning_rate": 5.9e-05,
|
|
"loss": 1.0194910685221354,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.02867083787537145,
|
|
"grad_norm": 0.7756445407867432,
|
|
"learning_rate": 9.999967846291053e-05,
|
|
"loss": 0.6481282552083333,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.04300625681305718,
|
|
"grad_norm": 0.8747914433479309,
|
|
"learning_rate": 9.999444133533631e-05,
|
|
"loss": 0.6397316614786784,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.0573416757507429,
|
|
"grad_norm": 0.8320423364639282,
|
|
"learning_rate": 9.99827920542705e-05,
|
|
"loss": 0.607297706604004,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.07167709468842863,
|
|
"grad_norm": 0.8628650307655334,
|
|
"learning_rate": 9.996473211381875e-05,
|
|
"loss": 0.5935359954833984,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.07167709468842863,
|
|
"eval_loss": 0.43492111563682556,
|
|
"eval_runtime": 5535.3239,
|
|
"eval_samples_per_second": 5.377,
|
|
"eval_steps_per_second": 5.377,
|
|
"step": 300
|
|
}
|
|
],
|
|
"logging_steps": 60,
|
|
"max_steps": 16744,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 4,
|
|
"save_steps": 300,
|
|
"stateful_callbacks": {
|
|
"EarlyStoppingCallback": {
|
|
"args": {
|
|
"early_stopping_patience": 3,
|
|
"early_stopping_threshold": 0.0
|
|
},
|
|
"attributes": {
|
|
"early_stopping_patience_counter": 0
|
|
}
|
|
},
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.0717106863759872e+17,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|